1 /* Copyright (c) 2004, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 This file defines the NDB Cluster handler: the interface between
28 MySQL and NDB Cluster
29 */
30
31 #include "ha_ndbcluster_glue.h"
32 #include "ha_ndbcluster.h"
33 #include <ndbapi/NdbApi.hpp>
34 #include <ndbapi/NdbIndexStat.hpp>
35 #include <ndbapi/NdbInterpretedCode.hpp>
36 #include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
37 #include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
38
39 #include "ha_ndbcluster_binlog.h"
40 #include "ha_ndbcluster_push.h"
41 #include "ha_ndbcluster_cond.h"
42 #include "ha_ndbcluster_tables.h"
43 #include "ha_ndbcluster_connection.h"
44 #include "ndb_thd.h"
45 #include "ndb_table_guard.h"
46 #include "ndb_global_schema_lock.h"
47 #include "ndb_global_schema_lock_guard.h"
48 #include "abstract_query_plan.h"
49 #include "partition_info.h"
50 #include "ndb_dist_priv_util.h"
51 #include "ha_ndb_index_stat.h"
52
53 #include <mysql/plugin.h>
54 #include <ndb_version.h>
55 #include <ndb_global.h>
56 #include "ndb_mi.h"
57 #include "ndb_conflict.h"
58 #include "ndb_anyvalue.h"
59 #include "ndb_binlog_extra_row_info.h"
60 #include "ndb_event_data.h"
61 #include "ndb_schema_dist.h"
62 #include "ndb_component.h"
63 #include "ndb_util_thread.h"
64 #include "ndb_local_connection.h"
65 #include "ndb_local_schema.h"
66 #include "ndb_tdc.h"
67 #include "ndb_log.h"
68 #include "ndb_name_util.h"
69 #include "../storage/ndb/src/common/util/parse_mask.hpp"
70 #include "../storage/ndb/include/util/SparseBitmask.hpp"
71 #include "m_ctype.h"
72
73 using std::min;
74 using std::max;
75
76 // ndb interface initialization/cleanup
77 extern "C" void ndb_init_internal();
78 extern "C" void ndb_end_internal();
79
80 static const int DEFAULT_PARALLELISM= 0;
81 static const ha_rows DEFAULT_AUTO_PREFETCH= 32;
82 static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;
83
84 ulong opt_ndb_extra_logging;
85 static ulong opt_ndb_wait_connected;
86 ulong opt_ndb_wait_setup;
87 static ulong opt_ndb_cache_check_time;
88 static uint opt_ndb_cluster_connection_pool;
89 static uint opt_ndb_recv_thread_activation_threshold;
90 static char* opt_ndb_recv_thread_cpu_mask;
91 static char* opt_ndb_index_stat_option;
92 static char* opt_ndb_connectstring;
93 static uint opt_ndb_nodeid;
94
95 static MYSQL_THDVAR_UINT(
96 autoincrement_prefetch_sz, /* name */
97 PLUGIN_VAR_RQCMDARG,
98 "Specify number of autoincrement values that are prefetched.",
99 NULL, /* check func. */
100 NULL, /* update func. */
101 1, /* default */
102 1, /* min */
103 65535, /* max */
104 0 /* block */
105 );
106
107
108 static MYSQL_THDVAR_BOOL(
109 force_send, /* name */
110 PLUGIN_VAR_OPCMDARG,
111 "Force send of buffers to ndb immediately without waiting for "
112 "other threads.",
113 NULL, /* check func. */
114 NULL, /* update func. */
115 1 /* default */
116 );
117
118
119 static MYSQL_THDVAR_BOOL(
120 use_exact_count, /* name */
121 PLUGIN_VAR_OPCMDARG,
122 "Use exact records count during query planning and for fast "
123 "select count(*), disable for faster queries.",
124 NULL, /* check func. */
125 NULL, /* update func. */
126 0 /* default */
127 );
128
129
130 static MYSQL_THDVAR_BOOL(
131 use_transactions, /* name */
132 PLUGIN_VAR_OPCMDARG,
133 "Use transactions for large inserts, if enabled then large "
134 "inserts will be split into several smaller transactions",
135 NULL, /* check func. */
136 NULL, /* update func. */
137 1 /* default */
138 );
139
140
141 static MYSQL_THDVAR_BOOL(
142 use_copying_alter_table, /* name */
143 PLUGIN_VAR_OPCMDARG,
144 "Force ndbcluster to always copy tables at alter table (should "
145 "only be used if on-line alter table fails).",
146 NULL, /* check func. */
147 NULL, /* update func. */
148 0 /* default */
149 );
150
151
152 static MYSQL_THDVAR_UINT(
153 optimized_node_selection, /* name */
154 PLUGIN_VAR_OPCMDARG,
155 "Select nodes for transactions in a more optimal way.",
156 NULL, /* check func. */
157 NULL, /* update func. */
158 3, /* default */
159 0, /* min */
160 3, /* max */
161 0 /* block */
162 );
163
164
165 static MYSQL_THDVAR_ULONG(
166 batch_size, /* name */
167 PLUGIN_VAR_RQCMDARG,
168 "Batch size in bytes.",
169 NULL, /* check func. */
170 NULL, /* update func. */
171 32768, /* default */
172 0, /* min */
173 ONE_YEAR_IN_SECONDS, /* max */
174 0 /* block */
175 );
176
177
178 static MYSQL_THDVAR_ULONG(
179 optimization_delay, /* name */
180 PLUGIN_VAR_RQCMDARG,
181 "For optimize table, specifies the delay in milliseconds "
182 "for each batch of rows sent.",
183 NULL, /* check func. */
184 NULL, /* update func. */
185 10, /* default */
186 0, /* min */
187 100000, /* max */
188 0 /* block */
189 );
190
191 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
192 #define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE
193 #else
194 #define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE
195 #endif
196
197 static MYSQL_THDVAR_BOOL(
198 index_stat_enable, /* name */
199 PLUGIN_VAR_OPCMDARG,
200 "Use ndb index statistics in query optimization.",
201 NULL, /* check func. */
202 NULL, /* update func. */
203 DEFAULT_NDB_INDEX_STAT_ENABLE /* default */
204 );
205
206
207 static MYSQL_THDVAR_BOOL(
208 table_no_logging, /* name */
209 PLUGIN_VAR_NOCMDARG,
210 "",
211 NULL, /* check func. */
212 NULL, /* update func. */
213 FALSE /* default */
214 );
215
216
217 static MYSQL_THDVAR_BOOL(
218 table_temporary, /* name */
219 PLUGIN_VAR_NOCMDARG,
220 "",
221 NULL, /* check func. */
222 NULL, /* update func. */
223 FALSE /* default */
224 );
225
226 static MYSQL_THDVAR_UINT(
227 blob_read_batch_bytes, /* name */
228 PLUGIN_VAR_RQCMDARG,
229 "Specifies the bytesize large Blob reads "
230 "should be batched into. 0 == No limit.",
231 NULL, /* check func */
232 NULL, /* update func */
233 65536, /* default */
234 0, /* min */
235 UINT_MAX, /* max */
236 0 /* block */
237 );
238
239 static MYSQL_THDVAR_UINT(
240 blob_write_batch_bytes, /* name */
241 PLUGIN_VAR_RQCMDARG,
242 "Specifies the bytesize large Blob writes "
243 "should be batched into. 0 == No limit.",
244 NULL, /* check func */
245 NULL, /* update func */
246 65536, /* default */
247 0, /* min */
248 UINT_MAX, /* max */
249 0 /* block */
250 );
251
252 static MYSQL_THDVAR_UINT(
253 deferred_constraints, /* name */
254 PLUGIN_VAR_RQCMDARG,
255 "Specified that constraints should be checked deferred (when supported)",
256 NULL, /* check func */
257 NULL, /* update func */
258 0, /* default */
259 0, /* min */
260 1, /* max */
261 0 /* block */
262 );
263
264 static MYSQL_THDVAR_BOOL(
265 show_foreign_key_mock_tables, /* name */
266 PLUGIN_VAR_OPCMDARG,
267 "Show the mock tables which is used to support foreign_key_checks= 0. "
268 "Extra info warnings are shown when creating and dropping the tables. "
269 "The real table name is show in SHOW CREATE TABLE",
270 NULL, /* check func. */
271 NULL, /* update func. */
272 0 /* default */
273 );
274
275 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
276 #define DEFAULT_NDB_JOIN_PUSHDOWN FALSE
277 #else
278 #define DEFAULT_NDB_JOIN_PUSHDOWN TRUE
279 #endif
280
281 static MYSQL_THDVAR_BOOL(
282 join_pushdown, /* name */
283 PLUGIN_VAR_OPCMDARG,
284 "Enable pushing down of join to datanodes",
285 NULL, /* check func. */
286 NULL, /* update func. */
287 DEFAULT_NDB_JOIN_PUSHDOWN /* default */
288 );
289
290 static MYSQL_THDVAR_BOOL(
291 log_exclusive_reads, /* name */
292 PLUGIN_VAR_OPCMDARG,
293 "Log primary key reads with exclusive locks "
294 "to allow conflict resolution based on read conflicts",
295 NULL, /* check func. */
296 NULL, /* update func. */
297 0 /* default */
298 );
299
300
301 /*
302 Required in index_stat.cc but available only from here
303 thanks to use of top level anonymous structs.
304 */
ndb_index_stat_get_enable(THD * thd)305 bool ndb_index_stat_get_enable(THD *thd)
306 {
307 const bool value = THDVAR(thd, index_stat_enable);
308 return value;
309 }
310
ndb_show_foreign_key_mock_tables(THD * thd)311 bool ndb_show_foreign_key_mock_tables(THD* thd)
312 {
313 const bool value = THDVAR(thd, show_foreign_key_mock_tables);
314 return value;
315 }
316
ndb_log_exclusive_reads(THD * thd)317 bool ndb_log_exclusive_reads(THD *thd)
318 {
319 const bool value = THDVAR(thd, log_exclusive_reads);
320 return value;
321 }
322
323 static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
324 static bool ndbcluster_show_status(handlerton *hton, THD*,
325 stat_print_fn *,
326 enum ha_stat_type);
327
328 static int ndbcluster_get_tablespace(THD* thd,
329 LEX_CSTRING db_name,
330 LEX_CSTRING table_name,
331 LEX_CSTRING *tablespace_name);
332 static int ndbcluster_alter_tablespace(handlerton *hton,
333 THD* thd,
334 st_alter_tablespace *info);
335 static int ndbcluster_fill_files_table(handlerton *hton,
336 THD *thd,
337 TABLE_LIST *tables,
338 Item *cond);
339
340 #if MYSQL_VERSION_ID >= 50501
341 /**
342 Used to fill in INFORMATION_SCHEMA* tables.
343
344 @param hton handle to the handlerton structure
345 @param thd the thread/connection descriptor
346 @param[in,out] tables the information schema table that is filled up
347 @param cond used for conditional pushdown to storage engine
348 @param schema_table_idx the table id that distinguishes the type of table
349
350 @return Operation status
351 */
352 static int
ndbcluster_fill_is_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond,enum enum_schema_tables schema_table_idx)353 ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,
354 Item *cond, enum enum_schema_tables schema_table_idx)
355 {
356 if (schema_table_idx == SCH_FILES)
357 return ndbcluster_fill_files_table(hton, thd, tables, cond);
358 return 0;
359 }
360 #endif
361
ndbcluster_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)362 static handler *ndbcluster_create_handler(handlerton *hton,
363 TABLE_SHARE *table,
364 MEM_ROOT *mem_root)
365 {
366 return new (mem_root) ha_ndbcluster(hton, table);
367 }
368
369 static uint
ndbcluster_partition_flags()370 ndbcluster_partition_flags()
371 {
372 return (HA_CAN_UPDATE_PARTITION_KEY |
373 HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
374 }
375
alter_flags(uint flags) const376 uint ha_ndbcluster::alter_flags(uint flags) const
377 {
378 const uint f=
379 HA_PARTITION_FUNCTION_SUPPORTED |
380 0;
381
382 if (flags & Alter_info::ALTER_DROP_PARTITION)
383 return 0;
384
385 return f;
386 }
387
388 #define NDB_AUTO_INCREMENT_RETRIES 100
389 #define BATCH_FLUSH_SIZE (32768)
390
391 #define ERR_PRINT(err) \
392 DBUG_PRINT("error", ("%d message: %s", err.code, err.message))
393
394 #define ERR_RETURN(err) \
395 { \
396 const NdbError& tmp= err; \
397 DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
398 }
399
400 #define ERR_BREAK(err, code) \
401 { \
402 const NdbError& tmp= err; \
403 code= ndb_to_mysql_error(&tmp); \
404 break; \
405 }
406
407 #define ERR_SET(err, code) \
408 { \
409 const NdbError& tmp= err; \
410 code= ndb_to_mysql_error(&tmp); \
411 }
412
413 static int ndbcluster_inited= 0;
414
415 /*
416 Indicator and CONDVAR used to delay client and slave
417 connections until Ndb has Binlog setup
418 (bug#46955)
419 */
420 int ndb_setup_complete= 0;
421 native_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex
422
423 extern Ndb* g_ndb;
424
425 /// Handler synchronization
426 native_mutex_t ndbcluster_mutex;
427
428 /// Table lock handling
429 HASH ndbcluster_open_tables;
430 HASH ndbcluster_dropped_tables;
431
432 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
433 my_bool);
434
435 static void modify_shared_stats(NDB_SHARE *share,
436 Ndb_local_table_statistics *local_stat);
437
438 static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,
439 const NdbRecord *, struct Ndb_statistics *,
440 uint part_id= ~(uint)0);
441
442 static ulong multi_range_fixed_size(int num_ranges);
443
444 static ulong multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength);
445
446 THD *injector_thd= 0;
447
448 /* Status variables shown with 'show status like 'Ndb%' */
449
450 struct st_ndb_status g_ndb_status;
451
452 const char *g_ndb_status_index_stat_status = "";
453 long g_ndb_status_index_stat_cache_query = 0;
454 long g_ndb_status_index_stat_cache_clean = 0;
455
456 long long g_event_data_count = 0;
457 long long g_event_nondata_count = 0;
458 long long g_event_bytes_count = 0;
459
460 static long long g_slave_api_client_stats[Ndb::NumClientStatistics];
461
462 static long long g_server_api_client_stats[Ndb::NumClientStatistics];
463
464 void
update_slave_api_stats(Ndb * ndb)465 update_slave_api_stats(Ndb* ndb)
466 {
467 for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)
468 g_slave_api_client_stats[i] = ndb->getClientStat(i);
469 }
470
471 st_ndb_slave_state g_ndb_slave_state;
472
check_slave_state(THD * thd)473 static int check_slave_state(THD* thd)
474 {
475 DBUG_ENTER("check_slave_state");
476
477 #ifdef HAVE_NDB_BINLOG
478 if (!thd->slave_thread)
479 DBUG_RETURN(0);
480
481 const Uint32 runId = ndb_mi_get_slave_run_id();
482 DBUG_PRINT("info", ("Slave SQL thread run id is %u",
483 runId));
484 if (unlikely(runId != g_ndb_slave_state.sql_run_id))
485 {
486 DBUG_PRINT("info", ("Slave run id changed from %u, "
487 "treating as Slave restart",
488 g_ndb_slave_state.sql_run_id));
489 g_ndb_slave_state.sql_run_id = runId;
490
491 g_ndb_slave_state.atStartSlave();
492
493 /* Always try to load the Max Replicated Epoch info
494 * first.
495 * Could be made optional if it's a problem
496 */
497 {
498 /*
499 Load highest replicated epoch from a local
500 MySQLD from the cluster.
501 */
502 DBUG_PRINT("info", ("Loading applied epoch information from %s",
503 NDB_APPLY_TABLE));
504 NdbError ndb_error;
505 Uint64 highestAppliedEpoch = 0;
506 do
507 {
508 Ndb* ndb= check_ndb_in_thd(thd);
509 NDBDICT* dict= ndb->getDictionary();
510 NdbTransaction* trans= NULL;
511 ndb->setDatabaseName(NDB_REP_DB);
512 Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
513
514 const NDBTAB* ndbtab= ndbtab_g.get_table();
515 if (unlikely(ndbtab == NULL))
516 {
517 ndb_error = dict->getNdbError();
518 break;
519 }
520
521 trans= ndb->startTransaction();
522 if (unlikely(trans == NULL))
523 {
524 ndb_error = ndb->getNdbError();
525 break;
526 }
527
528 do
529 {
530 NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);
531 if (unlikely(sop == NULL))
532 {
533 ndb_error = trans->getNdbError();
534 break;
535 }
536
537 const Uint32 server_id_col_num = 0;
538 const Uint32 epoch_col_num = 1;
539 NdbRecAttr* server_id_ra = 0;
540 NdbRecAttr* epoch_ra = 0;
541
542 if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0) ||
543 ((server_id_ra = sop->getValue(server_id_col_num)) == NULL) ||
544 ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))
545 {
546 ndb_error = sop->getNdbError();
547 break;
548 }
549
550 if (trans->execute(NdbTransaction::Commit))
551 {
552 ndb_error = trans->getNdbError();
553 break;
554 }
555
556 int rc = 0;
557 while (0 == (rc= sop->nextResult(true)))
558 {
559 Uint32 serverid = server_id_ra->u_32_value();
560 Uint64 epoch = epoch_ra->u_64_value();
561
562 if ((serverid == ::server_id) ||
563 (ndb_mi_get_ignore_server_id(serverid)))
564 {
565 highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);
566 }
567 }
568
569 if (rc != 1)
570 {
571 ndb_error = sop->getNdbError();
572 break;
573 }
574 } while (0);
575
576 trans->close();
577 } while(0);
578
579 if (ndb_error.code != 0)
580 {
581 sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "
582 "at Slave start, error %u %s",
583 NDB_REP_DB,
584 NDB_APPLY_TABLE,
585 ndb_error.code, ndb_error.message);
586 }
587
588 /*
589 Set Global status variable to the Highest Applied Epoch from
590 the Cluster DB.
591 If none was found, this will be zero.
592 */
593 g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;
594 sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",
595 g_ndb_slave_state.max_rep_epoch,
596 (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),
597 (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));
598 } // Load highest replicated epoch
599 } // New Slave SQL thread run id
600 #endif
601
602 DBUG_RETURN(0);
603 }
604
605
update_status_variables(Thd_ndb * thd_ndb,st_ndb_status * ns,Ndb_cluster_connection * c)606 static int update_status_variables(Thd_ndb *thd_ndb,
607 st_ndb_status *ns,
608 Ndb_cluster_connection *c)
609 {
610 ns->connected_port= c->get_connected_port();
611 ns->connected_host= c->get_connected_host();
612 if (ns->cluster_node_id != (int) c->node_id())
613 {
614 ns->cluster_node_id= c->node_id();
615 if (&g_ndb_status == ns && g_ndb_cluster_connection == c)
616 sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",
617 ns->cluster_node_id, ns->connected_host,
618 ns->connected_port);
619 }
620 ns->number_of_replicas= 0;
621 {
622 int n= c->get_no_ready();
623 ns->number_of_ready_data_nodes= n > 0 ? n : 0;
624 }
625 ns->number_of_data_nodes= c->no_db_nodes();
626 ns->connect_count= c->get_connect_count();
627 ns->last_commit_epoch_server= ndb_get_latest_trans_gci();
628 if (thd_ndb)
629 {
630 ns->execute_count= thd_ndb->m_execute_count;
631 ns->scan_count= thd_ndb->m_scan_count;
632 ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;
633 ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;
634 ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;
635 ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;
636 ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;
637 ns->pushed_reads= thd_ndb->m_pushed_reads;
638 ns->last_commit_epoch_session = thd_ndb->m_last_commit_epoch_session;
639 for (int i= 0; i < MAX_NDB_NODES; i++)
640 {
641 ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];
642 ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];
643 }
644 for (int i=0; i < Ndb::NumClientStatistics; i++)
645 {
646 ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);
647 }
648 ns->schema_locks_count= thd_ndb->schema_locks_count;
649 }
650 return 0;
651 }
652
653 /* Helper macro for definitions of NdbApi status variables */
654
655 #define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION) \
656 {"api_wait_exec_complete_count" NAME_SUFFIX, \
657 (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ], \
658 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
659 {"api_wait_scan_result_count" NAME_SUFFIX, \
660 (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ], \
661 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
662 {"api_wait_meta_request_count" NAME_SUFFIX, \
663 (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ], \
664 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
665 {"api_wait_nanos_count" NAME_SUFFIX, \
666 (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ], \
667 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
668 {"api_bytes_sent_count" NAME_SUFFIX, \
669 (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ], \
670 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
671 {"api_bytes_received_count" NAME_SUFFIX, \
672 (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ], \
673 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
674 {"api_trans_start_count" NAME_SUFFIX, \
675 (char*) ARRAY_LOCATION[ Ndb::TransStartCount ], \
676 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
677 {"api_trans_commit_count" NAME_SUFFIX, \
678 (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ], \
679 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
680 {"api_trans_abort_count" NAME_SUFFIX, \
681 (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ], \
682 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
683 {"api_trans_close_count" NAME_SUFFIX, \
684 (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ], \
685 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
686 {"api_pk_op_count" NAME_SUFFIX, \
687 (char*) ARRAY_LOCATION[ Ndb::PkOpCount ], \
688 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
689 {"api_uk_op_count" NAME_SUFFIX, \
690 (char*) ARRAY_LOCATION[ Ndb::UkOpCount ], \
691 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
692 {"api_table_scan_count" NAME_SUFFIX, \
693 (char*) ARRAY_LOCATION[ Ndb::TableScanCount ], \
694 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
695 {"api_range_scan_count" NAME_SUFFIX, \
696 (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ], \
697 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
698 {"api_pruned_scan_count" NAME_SUFFIX, \
699 (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ], \
700 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
701 {"api_scan_batch_count" NAME_SUFFIX, \
702 (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ], \
703 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
704 {"api_read_row_count" NAME_SUFFIX, \
705 (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ], \
706 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
707 {"api_trans_local_read_row_count" NAME_SUFFIX, \
708 (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ], \
709 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
710 {"api_adaptive_send_forced_count" NAME_SUFFIX, \
711 (char *) ARRAY_LOCATION[ Ndb::ForcedSendsCount ], \
712 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
713 {"api_adaptive_send_unforced_count" NAME_SUFFIX, \
714 (char *) ARRAY_LOCATION[ Ndb::UnforcedSendsCount ], \
715 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}, \
716 {"api_adaptive_send_deferred_count" NAME_SUFFIX, \
717 (char *) ARRAY_LOCATION[ Ndb::DeferredSendsCount ], \
718 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}
719
720 SHOW_VAR ndb_status_variables_dynamic[]= {
721 {"cluster_node_id", (char*) &g_ndb_status.cluster_node_id, SHOW_LONG, SHOW_SCOPE_GLOBAL},
722 {"config_from_host", (char*) &g_ndb_status.connected_host, SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
723 {"config_from_port", (char*) &g_ndb_status.connected_port, SHOW_LONG, SHOW_SCOPE_GLOBAL},
724 //{"number_of_replicas", (char*) &g_ndb_status.number_of_replicas, SHOW_LONG, SHOW_SCOPE_GLOBAL},
725 {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG, SHOW_SCOPE_GLOBAL},
726 {"number_of_ready_data_nodes",
727 (char*) &g_ndb_status.number_of_ready_data_nodes, SHOW_LONG, SHOW_SCOPE_GLOBAL},
728 {"connect_count", (char*) &g_ndb_status.connect_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
729 {"execute_count", (char*) &g_ndb_status.execute_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
730 {"scan_count", (char*) &g_ndb_status.scan_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
731 {"pruned_scan_count", (char*) &g_ndb_status.pruned_scan_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
732 {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
733 NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),
734 {"sorted_scan_count", (char*) &g_ndb_status.sorted_scan_count, SHOW_LONG, SHOW_SCOPE_GLOBAL},
735 {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined,
736 SHOW_LONG, SHOW_SCOPE_GLOBAL},
737 {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,
738 SHOW_LONG, SHOW_SCOPE_GLOBAL},
739 {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,
740 SHOW_LONG, SHOW_SCOPE_GLOBAL},
741 {"pushed_reads", (char*) &g_ndb_status.pushed_reads, SHOW_LONG, SHOW_SCOPE_GLOBAL},
742 {"last_commit_epoch_server",
743 (char*) &g_ndb_status.last_commit_epoch_server,
744 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
745 {"last_commit_epoch_session",
746 (char*) &g_ndb_status.last_commit_epoch_session,
747 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
748 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
749 };
750
751
752 SHOW_VAR ndb_status_injector_variables[]= {
753 {"api_event_data_count_injector", (char*) &g_event_data_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
754 {"api_event_nondata_count_injector", (char*) &g_event_nondata_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
755 {"api_event_bytes_count_injector", (char*) &g_event_bytes_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
756 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
757 };
758
759 SHOW_VAR ndb_status_slave_variables[]= {
760 NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),
761 {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
762 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
763 };
764
765 SHOW_VAR ndb_status_server_client_stat_variables[]= {
766 NDBAPI_COUNTERS("", &g_server_api_client_stats),
767 {"api_event_data_count",
768 (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ],
769 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
770 {"api_event_nondata_count",
771 (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ],
772 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
773 {"api_event_bytes_count",
774 (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ],
775 SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
776 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
777 };
778
show_ndb_server_api_stats(THD * thd,SHOW_VAR * var,char * buff)779 static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)
780 {
781 /* This function is called when SHOW STATUS / INFO_SCHEMA wants
782 * to see one of our status vars
783 * We use this opportunity to :
784 * 1) Update the globals with current values
785 * 2) Return an array of var definitions, pointing to
786 * the updated globals
787 */
788 ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);
789
790 var->type= SHOW_ARRAY;
791 var->value= (char*) ndb_status_server_client_stat_variables;
792 var->scope= SHOW_SCOPE_GLOBAL;
793
794 return 0;
795 }
796
797 SHOW_VAR ndb_status_index_stat_variables[]= {
798 {"status", (char*) &g_ndb_status_index_stat_status, SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
799 {"cache_query", (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG, SHOW_SCOPE_GLOBAL},
800 {"cache_clean", (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG, SHOW_SCOPE_GLOBAL},
801 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
802 };
803
804
805 /*
806 Error handling functions
807 */
808
809 /* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
810
ndb_to_mysql_error(const NdbError * ndberr)811 int ndb_to_mysql_error(const NdbError *ndberr)
812 {
813 /* read the mysql mapped error code */
814 int error= ndberr->mysql_code;
815
816 switch (error)
817 {
818 /* errors for which we do not add warnings, just return mapped error code
819 */
820 case HA_ERR_NO_SUCH_TABLE:
821 case HA_ERR_KEY_NOT_FOUND:
822 return error;
823
824 /* Mapping missing, go with the ndb error code */
825 case -1:
826 case 0:
827 /* Never map to errors below HA_ERR_FIRST */
828 if (ndberr->code < HA_ERR_FIRST)
829 error= HA_ERR_INTERNAL_ERROR;
830 else
831 error= ndberr->code;
832 break;
833 /* Mapping exists, go with the mapped code */
834 default:
835 break;
836 }
837
838 {
839 /*
840 Push the NDB error message as warning
841 - Used to be able to use SHOW WARNINGS to get more info
842 on what the error is
843 - Used by replication to see if the error was temporary
844 */
845 if (ndberr->status == NdbError::TemporaryError)
846 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
847 ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
848 ndberr->code, ndberr->message, "NDB");
849 else
850 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
851 ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
852 ndberr->code, ndberr->message, "NDB");
853 }
854 return error;
855 }
856
857 ulong opt_ndb_slave_conflict_role;
858
859 #ifdef HAVE_NDB_BINLOG
860
861 static int
862 handle_conflict_op_error(NdbTransaction* trans,
863 const NdbError& err,
864 const NdbOperation* op);
865
866 static int
867 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
868 const char* tab_name,
869 bool table_has_blobs,
870 const char* handling_type,
871 const NdbRecord* key_rec,
872 const NdbRecord* data_rec,
873 const uchar* old_row,
874 const uchar* new_row,
875 enum_conflicting_op_type op_type,
876 enum_conflict_cause conflict_cause,
877 const NdbError& conflict_error,
878 NdbTransaction* conflict_trans,
879 const MY_BITMAP *write_set,
880 Uint64 transaction_id);
881 #endif
882
883 static const Uint32 error_op_after_refresh_op = 920;
884
885 static inline
886 int
check_completed_operations_pre_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)887 check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
888 const NdbOperation *first,
889 const NdbOperation *last,
890 uint *ignore_count)
891 {
892 uint ignores= 0;
893 DBUG_ENTER("check_completed_operations_pre_commit");
894
895 if (unlikely(first == 0))
896 {
897 assert(last == 0);
898 DBUG_RETURN(0);
899 }
900
901 /*
902 Check that all errors are "accepted" errors
903 or exceptions to report
904 */
905 #ifdef HAVE_NDB_BINLOG
906 const NdbOperation* lastUserOp = trans->getLastDefinedOperation();
907 #endif
908 while (true)
909 {
910 const NdbError &err= first->getNdbError();
911 const bool op_has_conflict_detection = (first->getCustomData() != NULL);
912 if (!op_has_conflict_detection)
913 {
914 assert(err.code != (int) error_op_after_refresh_op);
915
916 /* 'Normal path' - ignore key (not) present, others are errors */
917 if (err.classification != NdbError::NoError &&
918 err.classification != NdbError::ConstraintViolation &&
919 err.classification != NdbError::NoDataFound)
920 {
921 /* Non ignored error, report it */
922 DBUG_PRINT("info", ("err.code == %u", err.code));
923 DBUG_RETURN(err.code);
924 }
925 }
926 #ifdef HAVE_NDB_BINLOG
927 else
928 {
929 /*
930 Op with conflict detection, use special error handling method
931 */
932
933 if (err.classification != NdbError::NoError)
934 {
935 int res = handle_conflict_op_error(trans,
936 err,
937 first);
938 if (res != 0)
939 DBUG_RETURN(res);
940 }
941 } // if (!op_has_conflict_detection)
942 #endif
943 if (err.classification != NdbError::NoError)
944 ignores++;
945
946 if (first == last)
947 break;
948
949 first= trans->getNextCompletedOperation(first);
950 }
951 if (ignore_count)
952 *ignore_count= ignores;
953 #ifdef HAVE_NDB_BINLOG
954 /*
955 Conflict detection related error handling above may have defined
956 new operations on the transaction. If so, execute them now
957 */
958 if (trans->getLastDefinedOperation() != lastUserOp)
959 {
960 const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();
961
962 NdbError nonMaskedError;
963 assert(nonMaskedError.code == 0);
964
965 if (trans->execute(NdbTransaction::NoCommit,
966 NdbOperation::AO_IgnoreError,
967 thd_ndb->m_force_send))
968 {
969 /* Transaction execute failed, even with IgnoreError... */
970 nonMaskedError = trans->getNdbError();
971 assert(nonMaskedError.code != 0);
972 }
973 else if (trans->getNdbError().code)
974 {
975 /* Check the result codes of the operations we added */
976 const NdbOperation* conflict_op = NULL;
977 do
978 {
979 conflict_op = trans->getNextCompletedOperation(conflict_op);
980 assert(conflict_op != NULL);
981 /* We will ignore 920 which represents a refreshOp or other op
982 * arriving after a refreshOp
983 */
984 const NdbError& err = conflict_op->getNdbError();
985 if ((err.code != 0) &&
986 (err.code != (int) error_op_after_refresh_op))
987 {
988 /* Found a real error, break out and handle it */
989 nonMaskedError = err;
990 break;
991 }
992 } while (conflict_op != last_conflict_op);
993 }
994
995 /* Handle errors with extra conflict handling operations */
996 if (nonMaskedError.code != 0)
997 {
998 if (nonMaskedError.status == NdbError::TemporaryError)
999 {
1000 /* Slave will roll back and retry entire transaction. */
1001 ERR_RETURN(nonMaskedError);
1002 }
1003 else
1004 {
1005 char msg[FN_REFLEN];
1006 my_snprintf(msg, sizeof(msg), "Executing extra operations for "
1007 "conflict handling hit Ndb error %d '%s'",
1008 nonMaskedError.code, nonMaskedError.message);
1009 push_warning_printf(current_thd, Sql_condition::SL_ERROR,
1010 ER_EXCEPTIONS_WRITE_ERROR,
1011 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
1012 /* Slave will stop replication. */
1013 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
1014 }
1015 }
1016 }
1017 #endif
1018 DBUG_RETURN(0);
1019 }
1020
1021 static inline
1022 int
check_completed_operations(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1023 check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,
1024 const NdbOperation *first,
1025 const NdbOperation *last,
1026 uint *ignore_count)
1027 {
1028 uint ignores= 0;
1029 DBUG_ENTER("check_completed_operations");
1030
1031 if (unlikely(first == 0))
1032 {
1033 assert(last == 0);
1034 DBUG_RETURN(0);
1035 }
1036
1037 /*
1038 Check that all errors are "accepted" errors
1039 */
1040 while (true)
1041 {
1042 const NdbError &err= first->getNdbError();
1043 if (err.classification != NdbError::NoError &&
1044 err.classification != NdbError::ConstraintViolation &&
1045 err.classification != NdbError::NoDataFound)
1046 {
1047 #ifdef HAVE_NDB_BINLOG
1048 /* All conflict detection etc should be done before commit */
1049 assert((err.code != (int) error_conflict_fn_violation) &&
1050 (err.code != (int) error_op_after_refresh_op));
1051 #endif
1052 DBUG_RETURN(err.code);
1053 }
1054 if (err.classification != NdbError::NoError)
1055 ignores++;
1056
1057 if (first == last)
1058 break;
1059
1060 first= trans->getNextCompletedOperation(first);
1061 }
1062 if (ignore_count)
1063 *ignore_count= ignores;
1064 DBUG_RETURN(0);
1065 }
1066
1067 void
release_completed_operations(NdbTransaction * trans)1068 ha_ndbcluster::release_completed_operations(NdbTransaction *trans)
1069 {
1070 /**
1071 * mysqld reads/write blobs fully,
1072 * which means that it does not keep blobs
1073 * open/active over execute, which means
1074 * that it should be safe to release anything completed here
1075 *
1076 * i.e don't check for blobs, but just go ahead and release
1077 */
1078 trans->releaseCompletedOperations();
1079 trans->releaseCompletedQueries();
1080 }
1081
1082
1083 static inline
1084 int
execute_no_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,bool ignore_no_key,uint * ignore_count=0)1085 execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1086 bool ignore_no_key,
1087 uint *ignore_count = 0)
1088 {
1089 DBUG_ENTER("execute_no_commit");
1090 ha_ndbcluster::release_completed_operations(trans);
1091 const NdbOperation *first= trans->getFirstDefinedOperation();
1092 const NdbOperation *last= trans->getLastDefinedOperation();
1093 thd_ndb->m_execute_count++;
1094 thd_ndb->m_unsent_bytes= 0;
1095 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1096 int rc= 0;
1097 do
1098 {
1099 if (trans->execute(NdbTransaction::NoCommit,
1100 NdbOperation::AO_IgnoreError,
1101 thd_ndb->m_force_send))
1102 {
1103 rc= -1;
1104 break;
1105 }
1106 if (!ignore_no_key || trans->getNdbError().code == 0)
1107 {
1108 rc= trans->getNdbError().code;
1109 break;
1110 }
1111
1112 rc = check_completed_operations_pre_commit(thd_ndb, trans,
1113 first, last,
1114 ignore_count);
1115 } while (0);
1116
1117 if (unlikely(thd_ndb->is_slave_thread() &&
1118 rc != 0))
1119 {
1120 g_ndb_slave_state.atTransactionAbort();
1121 }
1122
1123 DBUG_PRINT("info", ("execute_no_commit rc is %d", rc));
1124 DBUG_RETURN(rc);
1125 }
1126
1127
1128 static inline
1129 int
execute_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,int force_send,int ignore_error,uint * ignore_count=0)1130 execute_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1131 int force_send, int ignore_error, uint *ignore_count = 0)
1132 {
1133 DBUG_ENTER("execute_commit");
1134 NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;
1135 if (thd_ndb->m_unsent_bytes && !ignore_error)
1136 {
1137 /*
1138 We have unsent bytes and cannot ignore error. Calling execute
1139 with NdbOperation::AO_IgnoreError will result in possible commit
1140 of a transaction although there is an error.
1141 */
1142 ao= NdbOperation::AbortOnError;
1143 }
1144 const NdbOperation *first= trans->getFirstDefinedOperation();
1145 const NdbOperation *last= trans->getLastDefinedOperation();
1146 thd_ndb->m_execute_count++;
1147 thd_ndb->m_unsent_bytes= 0;
1148 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1149 int rc= 0;
1150 do
1151 {
1152 if (trans->execute(NdbTransaction::Commit, ao, force_send))
1153 {
1154 rc= -1;
1155 break;
1156 }
1157
1158 if (!ignore_error || trans->getNdbError().code == 0)
1159 {
1160 rc= trans->getNdbError().code;
1161 break;
1162 }
1163
1164 rc= check_completed_operations(thd_ndb, trans, first, last,
1165 ignore_count);
1166 } while (0);
1167
1168 if (likely(rc == 0))
1169 {
1170 /* Committed ok, update session GCI, if it's available
1171 * (Not available for reads, empty transactions etc...)
1172 */
1173 Uint64 reportedGCI;
1174 if (trans->getGCI(&reportedGCI) == 0 &&
1175 reportedGCI != 0)
1176 {
1177 assert(reportedGCI >= thd_ndb->m_last_commit_epoch_session);
1178 thd_ndb->m_last_commit_epoch_session = reportedGCI;
1179 }
1180 }
1181
1182 if (thd_ndb->is_slave_thread())
1183 {
1184 if (likely(rc == 0))
1185 {
1186 /* Success */
1187 g_ndb_slave_state.atTransactionCommit(thd_ndb->m_last_commit_epoch_session);
1188 }
1189 else
1190 {
1191 g_ndb_slave_state.atTransactionAbort();
1192 }
1193 }
1194
1195 DBUG_PRINT("info", ("execute_commit rc is %d", rc));
1196 DBUG_RETURN(rc);
1197 }
1198
1199 static inline
execute_no_commit_ie(Thd_ndb * thd_ndb,NdbTransaction * trans)1200 int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)
1201 {
1202 DBUG_ENTER("execute_no_commit_ie");
1203 ha_ndbcluster::release_completed_operations(trans);
1204 int res= trans->execute(NdbTransaction::NoCommit,
1205 NdbOperation::AO_IgnoreError,
1206 thd_ndb->m_force_send);
1207 thd_ndb->m_unsent_bytes= 0;
1208 thd_ndb->m_execute_count++;
1209 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1210 DBUG_RETURN(res);
1211 }
1212
1213 /*
1214 Place holder for ha_ndbcluster thread specific data
1215 */
1216 typedef struct st_thd_ndb_share {
1217 const void *key;
1218 struct Ndb_local_table_statistics stat;
1219 } THD_NDB_SHARE;
1220 static
thd_ndb_share_get_key(THD_NDB_SHARE * thd_ndb_share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))1221 uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,
1222 my_bool not_used MY_ATTRIBUTE((unused)))
1223 {
1224 *length= sizeof(thd_ndb_share->key);
1225 return (uchar*) &thd_ndb_share->key;
1226 }
1227
Thd_ndb(THD * thd)1228 Thd_ndb::Thd_ndb(THD* thd) :
1229 m_thd(thd),
1230 m_slave_thread(thd->slave_thread),
1231 m_skip_binlog_setup_in_find_files(false),
1232 schema_locks_count(0),
1233 m_last_commit_epoch_session(0)
1234 {
1235 connection= ndb_get_cluster_connection();
1236 m_connect_count= connection->get_connect_count();
1237 ndb= new Ndb(connection, "");
1238 lock_count= 0;
1239 start_stmt_count= 0;
1240 save_point_count= 0;
1241 count= 0;
1242 trans= NULL;
1243 m_handler= NULL;
1244 m_error= FALSE;
1245 options= 0;
1246 (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,
1247 (my_hash_get_key)thd_ndb_share_get_key, 0, 0,
1248 PSI_INSTRUMENT_ME);
1249 m_unsent_bytes= 0;
1250 m_execute_count= 0;
1251 m_scan_count= 0;
1252 m_pruned_scan_count= 0;
1253 m_sorted_scan_count= 0;
1254 m_pushed_queries_defined= 0;
1255 m_pushed_queries_dropped= 0;
1256 m_pushed_queries_executed= 0;
1257 m_pushed_reads= 0;
1258 memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));
1259 memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));
1260 global_schema_lock_trans= NULL;
1261 global_schema_lock_count= 0;
1262 global_schema_lock_error= 0;
1263 init_alloc_root(PSI_INSTRUMENT_ME,
1264 &m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);
1265 }
1266
~Thd_ndb()1267 Thd_ndb::~Thd_ndb()
1268 {
1269 if (opt_ndb_extra_logging > 1)
1270 {
1271 /*
1272 print some stats about the connection at disconnect
1273 */
1274 for (int i= 0; i < MAX_NDB_NODES; i++)
1275 {
1276 if (m_transaction_hint_count[i] > 0 ||
1277 m_transaction_no_hint_count[i] > 0)
1278 {
1279 sql_print_information("tid %u: node[%u] "
1280 "transaction_hint=%u, transaction_no_hint=%u",
1281 m_thd->thread_id(), i,
1282 m_transaction_hint_count[i],
1283 m_transaction_no_hint_count[i]);
1284 }
1285 }
1286 }
1287 if (ndb)
1288 {
1289 delete ndb;
1290 ndb= NULL;
1291 }
1292 changed_tables.empty();
1293 my_hash_free(&open_tables);
1294 free_root(&m_batch_mem_root, MYF(0));
1295 }
1296
1297
get_ndb(THD * thd) const1298 Ndb *ha_ndbcluster::get_ndb(THD *thd) const
1299 {
1300 return thd_get_thd_ndb(thd)->ndb;
1301 }
1302
1303 /*
1304 * manage uncommitted insert/deletes during transactio to get records correct
1305 */
1306
set_rec_per_key()1307 void ha_ndbcluster::set_rec_per_key()
1308 {
1309 DBUG_ENTER("ha_ndbcluster::set_rec_per_key");
1310 /*
1311 Set up the 'rec_per_key[]' for keys which we have good knowledge
1312 about the distribution. 'rec_per_key[]' is init'ed to '0' by
1313 open_binary_frm(), which is interpreted as 'unknown' by optimizer.
1314 -> Not setting 'rec_per_key[]' will force the optimizer to use
1315 its own heuristic to estimate 'records pr. key'.
1316 */
1317 for (uint i=0 ; i < table_share->keys ; i++)
1318 {
1319 bool is_unique_index= false;
1320 KEY* key_info= table->key_info + i;
1321 switch (get_index_type(i))
1322 {
1323 case UNIQUE_INDEX:
1324 case PRIMARY_KEY_INDEX:
1325 {
1326 // Index is unique when all 'key_parts' are specified,
1327 // else distribution is unknown and not specified here.
1328 is_unique_index= true;
1329 break;
1330 }
1331 case UNIQUE_ORDERED_INDEX:
1332 case PRIMARY_KEY_ORDERED_INDEX:
1333 is_unique_index= true;
1334 // intentional fall thru to logic for ordered index
1335 case ORDERED_INDEX:
1336 // 'Records pr. key' are unknown for non-unique indexes.
1337 // (May change when we get better index statistics.)
1338 {
1339 THD *thd= current_thd;
1340 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
1341 THDVAR(thd, index_stat_enable);
1342 if (index_stat_enable)
1343 {
1344 int err= ndb_index_stat_set_rpk(i);
1345 if (err != 0 &&
1346 /* no stats is not unexpected error */
1347 err != NdbIndexStat::NoIndexStats &&
1348 /* warning was printed at first error */
1349 err != NdbIndexStat::MyHasError &&
1350 /* stats thread aborted request */
1351 err != NdbIndexStat::MyAbortReq)
1352 {
1353 push_warning_printf(thd, Sql_condition::SL_WARNING,
1354 ER_CANT_GET_STAT, /* pun? */
1355 "index stats (RPK) for key %s:"
1356 " unexpected error %d",
1357 key_info->name, err);
1358 }
1359 }
1360 // no fallback method...
1361 break;
1362 }
1363 default:
1364 assert(false);
1365 }
1366 // set rows per key to 1 for complete key given for unique/primary index
1367 if (is_unique_index)
1368 {
1369 key_info->set_records_per_key(key_info->user_defined_key_parts-1, 1.0f);
1370 }
1371 }
1372 DBUG_VOID_RETURN;
1373 }
1374
records(ha_rows * num_rows)1375 int ha_ndbcluster::records(ha_rows* num_rows)
1376 {
1377 DBUG_ENTER("ha_ndbcluster::records");
1378 DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1379 m_table->getTableId(),
1380 m_table_info->no_uncommitted_rows_count));
1381
1382 int error = update_stats(table->in_use, 1);
1383 if (error != 0)
1384 {
1385 *num_rows = HA_POS_ERROR;
1386 DBUG_RETURN(error);
1387 }
1388
1389 *num_rows = stats.records;
1390 DBUG_RETURN(0);
1391 }
1392
no_uncommitted_rows_execute_failure()1393 void ha_ndbcluster::no_uncommitted_rows_execute_failure()
1394 {
1395 DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
1396 get_thd_ndb(current_thd)->m_error= TRUE;
1397 DBUG_VOID_RETURN;
1398 }
1399
no_uncommitted_rows_update(int c)1400 void ha_ndbcluster::no_uncommitted_rows_update(int c)
1401 {
1402 DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
1403 struct Ndb_local_table_statistics *local_info= m_table_info;
1404 local_info->no_uncommitted_rows_count+= c;
1405 DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1406 m_table->getTableId(),
1407 local_info->no_uncommitted_rows_count));
1408 DBUG_VOID_RETURN;
1409 }
1410
1411
ndb_err(NdbTransaction * trans)1412 int ha_ndbcluster::ndb_err(NdbTransaction *trans)
1413 {
1414 THD *thd= current_thd;
1415 int res;
1416 NdbError err= trans->getNdbError();
1417 DBUG_ENTER("ndb_err");
1418
1419 switch (err.classification) {
1420 case NdbError::SchemaError:
1421 {
1422 // TODO perhaps we need to do more here, invalidate also in the cache
1423 m_table->setStatusInvalid();
1424 /* Close other open handlers not used by any thread */
1425 ndb_tdc_close_cached_table(thd, m_dbname, m_tabname);
1426 break;
1427 }
1428 default:
1429 break;
1430 }
1431 res= ndb_to_mysql_error(&err);
1432 DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d",
1433 err.code, res));
1434 if (res == HA_ERR_FOUND_DUPP_KEY)
1435 {
1436 char *error_data= err.details;
1437 uint dupkey= MAX_KEY;
1438
1439 for (uint i= 0; i < MAX_KEY; i++)
1440 {
1441 if (m_index[i].type == UNIQUE_INDEX ||
1442 m_index[i].type == UNIQUE_ORDERED_INDEX)
1443 {
1444 const NDBINDEX *unique_index=
1445 (const NDBINDEX *) m_index[i].unique_index;
1446 if (unique_index && UintPtr(unique_index->getObjectId()) == UintPtr(error_data))
1447 {
1448 dupkey= i;
1449 break;
1450 }
1451 }
1452 }
1453 if (m_rows_to_insert == 1)
1454 {
1455 /*
1456 We can only distinguish between primary and non-primary
1457 violations here, so we need to return MAX_KEY for non-primary
1458 to signal that key is unknown
1459 */
1460 m_dupkey= err.code == 630 ? table_share->primary_key : dupkey;
1461 }
1462 else
1463 {
1464 /* We are batching inserts, offending key is not available */
1465 m_dupkey= (uint) -1;
1466 }
1467 }
1468 DBUG_RETURN(res);
1469 }
1470
1471
1472 /**
1473 Override the default get_error_message in order to add the
1474 error message of NDB .
1475 */
1476
get_error_message(int error,String * buf)1477 bool ha_ndbcluster::get_error_message(int error,
1478 String *buf)
1479 {
1480 DBUG_ENTER("ha_ndbcluster::get_error_message");
1481 DBUG_PRINT("enter", ("error: %d", error));
1482
1483 Ndb *ndb= check_ndb_in_thd(current_thd);
1484 if (!ndb)
1485 DBUG_RETURN(FALSE);
1486
1487 const NdbError err= ndb->getNdbError(error);
1488 bool temporary= err.status==NdbError::TemporaryError;
1489 buf->set(err.message, (uint32)strlen(err.message), &my_charset_bin);
1490 DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
1491 DBUG_RETURN(temporary);
1492 }
1493
1494
1495 /*
1496 field_used_length() returns the number of bytes actually used to
1497 store the data of the field. So for a varstring it includes both
1498 length byte(s) and string data, and anything after data_length()
1499 bytes are unused.
1500 */
1501 static
field_used_length(const Field * field)1502 uint32 field_used_length(const Field* field)
1503 {
1504 if (field->type() == MYSQL_TYPE_VARCHAR)
1505 {
1506 const Field_varstring* f = static_cast<const Field_varstring*>(field);
1507 return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();
1508 // ^ no 'data_length() const'
1509 }
1510 return field->pack_length();
1511 }
1512
1513
1514 /**
1515 Check if MySQL field type forces var part in ndb storage
1516 */
field_type_forces_var_part(enum_field_types type)1517 static bool field_type_forces_var_part(enum_field_types type)
1518 {
1519 switch (type) {
1520 case MYSQL_TYPE_VAR_STRING:
1521 case MYSQL_TYPE_VARCHAR:
1522 return TRUE;
1523 case MYSQL_TYPE_TINY_BLOB:
1524 case MYSQL_TYPE_BLOB:
1525 case MYSQL_TYPE_MEDIUM_BLOB:
1526 case MYSQL_TYPE_LONG_BLOB:
1527 case MYSQL_TYPE_JSON:
1528 case MYSQL_TYPE_GEOMETRY:
1529 return FALSE;
1530 default:
1531 return FALSE;
1532 }
1533 }
1534
1535 /*
1536 Return a generic buffer that will remain valid until after next execute.
1537
1538 The memory is freed by the first call to add_row_check_if_batch_full_size()
1539 following any execute() call. The intention is that the memory is associated
1540 with one batch of operations during batched slave updates.
1541
1542 Note in particular that using get_buffer() / copy_row_to_buffer() separately
1543 from add_row_check_if_batch_full_size() could make meory usage grow without
1544 limit, and that this sequence:
1545
1546 execute()
1547 get_buffer() / copy_row_to_buffer()
1548 add_row_check_if_batch_full_size()
1549 ...
1550 execute()
1551
1552 will free the memory already at add_row_check_if_batch_full_size() time, it
1553 will not remain valid until the second execute().
1554 */
1555 uchar *
get_buffer(Thd_ndb * thd_ndb,uint size)1556 ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)
1557 {
1558 return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);
1559 }
1560
1561 uchar *
copy_row_to_buffer(Thd_ndb * thd_ndb,const uchar * record)1562 ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)
1563 {
1564 uchar *row= get_buffer(thd_ndb, table->s->reclength);
1565 if (unlikely(!row))
1566 return NULL;
1567 memcpy(row, record, table->s->reclength);
1568 return row;
1569 }
1570
1571 /**
1572 * findBlobError
1573 * This method attempts to find an error in the hierarchy of runtime
1574 * NDBAPI objects from Blob up to transaction.
1575 * It will return -1 if no error is found, 0 if an error is found.
1576 */
findBlobError(NdbError & error,NdbBlob * pBlob)1577 int findBlobError(NdbError& error, NdbBlob* pBlob)
1578 {
1579 error= pBlob->getNdbError();
1580 if (error.code != 0)
1581 return 0;
1582
1583 const NdbOperation* pOp= pBlob->getNdbOperation();
1584 error= pOp->getNdbError();
1585 if (error.code != 0)
1586 return 0;
1587
1588 NdbTransaction* pTrans= pOp->getNdbTransaction();
1589 error= pTrans->getNdbError();
1590 if (error.code != 0)
1591 return 0;
1592
1593 /* No error on any of the objects */
1594 return -1;
1595 }
1596
1597
1598 /*
1599 This routine calculates the length of the blob/text after applying mysql limits
1600 on blob/text sizes. If the blob contains multi-byte characters, the length is
1601 reduced till the end of the last well-formed char, so that data is not truncated
1602 in the middle of a multi-byte char.
1603 */
calc_ndb_blob_len(const CHARSET_INFO * cs,uchar * blob_ptr,uint64 maxlen)1604 uint64 calc_ndb_blob_len(const CHARSET_INFO *cs, uchar *blob_ptr, uint64 maxlen)
1605 {
1606 int errors = 0;
1607
1608 const char *begin = (const char*) blob_ptr;
1609 const char *end = (const char*) (blob_ptr+maxlen);
1610
1611 // avoid truncation in the middle of a multi-byte character by
1612 // stopping at end of last well-formed character before max length
1613 uint32 numchars = cs->cset->numchars(cs, begin, end);
1614 uint64 len64 = cs->cset->well_formed_len(cs, begin, end, numchars, &errors);
1615 assert(len64 <= maxlen);
1616
1617 return len64;
1618 }
1619
g_get_ndb_blobs_value(NdbBlob * ndb_blob,void * arg)1620 int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
1621 {
1622 ha_ndbcluster *ha= (ha_ndbcluster *)arg;
1623 DBUG_ENTER("g_get_ndb_blobs_value");
1624 DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));
1625
1626 if (ha->m_blob_counter == 0) /* Reset total size at start of row */
1627 ha->m_blobs_row_total_size= 0;
1628
1629 /* Count the total length needed for blob data. */
1630 int isNull;
1631 if (ndb_blob->getNull(isNull) != 0)
1632 ERR_RETURN(ndb_blob->getNdbError());
1633 if (isNull == 0) {
1634 Uint64 len64= 0;
1635 if (ndb_blob->getLength(len64) != 0)
1636 ERR_RETURN(ndb_blob->getNdbError());
1637 /* Align to Uint64. */
1638 ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);
1639 if (ha->m_blobs_row_total_size > 0xffffffff)
1640 {
1641 assert(FALSE);
1642 DBUG_RETURN(-1);
1643 }
1644 DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",
1645 ha->m_blob_counter,
1646 len64,
1647 ha->m_blobs_row_total_size));
1648 }
1649 ha->m_blob_counter++;
1650
1651 /*
1652 Wait until all blobs in this row are active, so we can allocate
1653 and use a common buffer containing all.
1654 */
1655 if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)
1656 DBUG_RETURN(0);
1657
1658 /* Reset blob counter for next row (scan scenario) */
1659 ha->m_blob_counter= 0;
1660
1661 /* Re-allocate bigger blob buffer for this row if necessary. */
1662 if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)
1663 {
1664 my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1665 DBUG_PRINT("info", ("allocate blobs buffer size %u",
1666 (uint32)(ha->m_blobs_row_total_size)));
1667 /* Windows compiler complains about my_malloc on non-size_t
1668 * validate mapping from Uint64 to size_t
1669 */
1670 if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)
1671 {
1672 ha->m_blobs_buffer= NULL;
1673 ha->m_blobs_buffer_size= 0;
1674 DBUG_RETURN(-1);
1675 }
1676
1677 ha->m_blobs_buffer=
1678 (uchar*) my_malloc(PSI_INSTRUMENT_ME,
1679 (size_t) ha->m_blobs_row_total_size, MYF(MY_WME));
1680 if (ha->m_blobs_buffer == NULL)
1681 {
1682 ha->m_blobs_buffer_size= 0;
1683 DBUG_RETURN(-1);
1684 }
1685 ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;
1686 }
1687
1688 /*
1689 Now read all blob data.
1690 If we know the destination mysqld row, we also set the blob null bit and
1691 pointer/length (if not, it will be done instead in unpack_record()).
1692 */
1693 uint32 offset= 0;
1694 for (uint i= 0; i < ha->table->s->fields; i++)
1695 {
1696 Field *field= ha->table->field[i];
1697 if (! (field->flags & BLOB_FLAG))
1698 continue;
1699 NdbValue value= ha->m_value[i];
1700 if (value.blob == NULL)
1701 {
1702 DBUG_PRINT("info",("[%u] skipped", i));
1703 continue;
1704 }
1705 Field_blob *field_blob= (Field_blob *)field;
1706 NdbBlob *ndb_blob= value.blob;
1707 int isNull;
1708 if (ndb_blob->getNull(isNull) != 0)
1709 ERR_RETURN(ndb_blob->getNdbError());
1710 if (isNull == 0) {
1711 Uint64 len64= 0;
1712 if (ndb_blob->getLength(len64) != 0)
1713 ERR_RETURN(ndb_blob->getNdbError());
1714 assert(len64 < 0xffffffff);
1715 uchar *buf= ha->m_blobs_buffer + offset;
1716 uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);
1717 if (ndb_blob->readData(buf, len) != 0)
1718 {
1719 NdbError err;
1720 if (findBlobError(err, ndb_blob) == 0)
1721 {
1722 ERR_RETURN(err);
1723 }
1724 else
1725 {
1726 /* Should always have some error code set */
1727 assert(err.code != 0);
1728 ERR_RETURN(err);
1729 }
1730 }
1731 DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u",
1732 i, offset, (long) buf, len));
1733 assert(len == len64);
1734 if (ha->m_blob_destination_record)
1735 {
1736 my_ptrdiff_t ptrdiff=
1737 ha->m_blob_destination_record - ha->table->record[0];
1738 field_blob->move_field_offset(ptrdiff);
1739
1740 if(len > field_blob->max_data_length())
1741 {
1742 len = calc_ndb_blob_len(field_blob->charset(),
1743 buf, field_blob->max_data_length());
1744
1745 // push a warning
1746 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
1747 WARN_DATA_TRUNCATED,
1748 "Truncated value from TEXT field \'%s\'", field_blob->field_name);
1749 }
1750
1751 field_blob->set_ptr(len, buf);
1752 field_blob->set_notnull();
1753 field_blob->move_field_offset(-ptrdiff);
1754 }
1755 offset+= Uint32((len64 + 7) & ~((Uint64)7));
1756 }
1757 else if (ha->m_blob_destination_record)
1758 {
1759 /* Have to set length even in this case. */
1760 my_ptrdiff_t ptrdiff=
1761 ha->m_blob_destination_record - ha->table->record[0];
1762 uchar *buf= ha->m_blobs_buffer + offset;
1763 field_blob->move_field_offset(ptrdiff);
1764 field_blob->set_ptr((uint32)0, buf);
1765 field_blob->set_null();
1766 field_blob->move_field_offset(-ptrdiff);
1767 DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
1768 }
1769 }
1770
1771 if (!ha->m_active_cursor)
1772 {
1773 /* Non-scan, Blob reads have been issued
1774 * execute them and then close the Blob
1775 * handles
1776 */
1777 for (uint i= 0; i < ha->table->s->fields; i++)
1778 {
1779 Field *field= ha->table->field[i];
1780 if (! (field->flags & BLOB_FLAG))
1781 continue;
1782 NdbValue value= ha->m_value[i];
1783 if (value.blob == NULL)
1784 {
1785 DBUG_PRINT("info",("[%u] skipped", i));
1786 continue;
1787 }
1788 NdbBlob *ndb_blob= value.blob;
1789
1790 assert(ndb_blob->getState() == NdbBlob::Active);
1791
1792 /* Call close() with execPendingBlobOps == true
1793 * For LM_CommittedRead access, this will enqueue
1794 * an unlock operation, which the Blob framework
1795 * code invoking this callback will execute before
1796 * returning control to the caller of execute()
1797 */
1798 if (ndb_blob->close(true) != 0)
1799 {
1800 ERR_RETURN(ndb_blob->getNdbError());
1801 }
1802 }
1803 }
1804
1805 DBUG_RETURN(0);
1806 }
1807
1808 /*
1809 Request reading of blob values.
1810
1811 If dst_record is specified, the blob null bit, pointer, and length will be
1812 set in that record. Otherwise they must be set later by calling
1813 unpack_record().
1814 */
1815 int
get_blob_values(const NdbOperation * ndb_op,uchar * dst_record,const MY_BITMAP * bitmap)1816 ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,
1817 const MY_BITMAP *bitmap)
1818 {
1819 uint i;
1820 DBUG_ENTER("ha_ndbcluster::get_blob_values");
1821
1822 m_blob_counter= 0;
1823 m_blob_expected_count_per_row= 0;
1824 m_blob_destination_record= dst_record;
1825 m_blobs_row_total_size= 0;
1826 ndb_op->getNdbTransaction()->
1827 setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));
1828
1829 for (i= 0; i < table_share->fields; i++)
1830 {
1831 Field *field= table->field[i];
1832 if (!(field->flags & BLOB_FLAG))
1833 continue;
1834
1835 DBUG_PRINT("info", ("fieldnr=%d", i));
1836 NdbBlob *ndb_blob;
1837 if (bitmap_is_set(bitmap, i))
1838 {
1839 if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||
1840 ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)
1841 DBUG_RETURN(1);
1842 m_blob_expected_count_per_row++;
1843 }
1844 else
1845 ndb_blob= NULL;
1846
1847 m_value[i].blob= ndb_blob;
1848 }
1849
1850 DBUG_RETURN(0);
1851 }
1852
1853 int
set_blob_values(const NdbOperation * ndb_op,my_ptrdiff_t row_offset,const MY_BITMAP * bitmap,uint * set_count,bool batch)1854 ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,
1855 my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,
1856 uint *set_count, bool batch)
1857 {
1858 uint field_no;
1859 uint *blob_index, *blob_index_end;
1860 int res= 0;
1861 DBUG_ENTER("ha_ndbcluster::set_blob_values");
1862
1863 *set_count= 0;
1864
1865 if (table_share->blob_fields == 0)
1866 DBUG_RETURN(0);
1867
1868 ndb_op->getNdbTransaction()->
1869 setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));
1870 blob_index= table_share->blob_field;
1871 blob_index_end= blob_index + table_share->blob_fields;
1872 do
1873 {
1874 field_no= *blob_index;
1875 /* A NULL bitmap sets all blobs. */
1876 if (bitmap && !bitmap_is_set(bitmap, field_no))
1877 continue;
1878 Field *field= table->field[field_no];
1879
1880 NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);
1881 if (ndb_blob == NULL)
1882 ERR_RETURN(ndb_op->getNdbError());
1883 if (field->is_real_null(row_offset))
1884 {
1885 DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));
1886 if (ndb_blob->setNull() != 0)
1887 ERR_RETURN(ndb_op->getNdbError());
1888 }
1889 else
1890 {
1891 Field_blob *field_blob= (Field_blob *)field;
1892
1893 // Get length and pointer to data
1894 const uchar *field_ptr= field->ptr + row_offset;
1895 uint32 blob_len= field_blob->get_length(field_ptr);
1896 uchar* blob_ptr= NULL;
1897 field_blob->get_ptr(&blob_ptr);
1898
1899 // Looks like NULL ptr signals length 0 blob
1900 if (blob_ptr == NULL) {
1901 assert(blob_len == 0);
1902 blob_ptr= (uchar*)"";
1903 }
1904
1905 DBUG_PRINT("value", ("set blob ptr: 0x%lx len: %u",
1906 (long) blob_ptr, blob_len));
1907 DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));
1908
1909 /*
1910 NdbBlob requires the data pointer to remain valid until execute() time.
1911 So when batching, we need to copy the value to a temporary buffer.
1912 */
1913 if (batch && blob_len > 0)
1914 {
1915 uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);
1916 if (!tmp_buf)
1917 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1918 memcpy(tmp_buf, blob_ptr, blob_len);
1919 blob_ptr= tmp_buf;
1920 }
1921 res= ndb_blob->setValue((char*)blob_ptr, blob_len);
1922 if (res != 0)
1923 ERR_RETURN(ndb_op->getNdbError());
1924 }
1925
1926 ++(*set_count);
1927 } while (++blob_index != blob_index_end);
1928
1929 DBUG_RETURN(res);
1930 }
1931
1932
1933 /**
1934 Check if any set or get of blob value in current query.
1935 */
1936
uses_blob_value(const MY_BITMAP * bitmap) const1937 bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const
1938 {
1939 uint *blob_index, *blob_index_end;
1940 if (table_share->blob_fields == 0)
1941 return FALSE;
1942
1943 blob_index= table_share->blob_field;
1944 blob_index_end= blob_index + table_share->blob_fields;
1945 do
1946 {
1947 if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
1948 return TRUE;
1949 } while (++blob_index != blob_index_end);
1950 return FALSE;
1951 }
1952
release_blobs_buffer()1953 void ha_ndbcluster::release_blobs_buffer()
1954 {
1955 DBUG_ENTER("releaseBlobsBuffer");
1956 if (m_blobs_buffer_size > 0)
1957 {
1958 DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));
1959 my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1960 m_blobs_buffer= 0;
1961 m_blobs_row_total_size= 0;
1962 m_blobs_buffer_size= 0;
1963 }
1964 DBUG_VOID_RETURN;
1965 }
1966
1967
1968 /*
1969 Does type support a default value?
1970 */
1971 static bool
type_supports_default_value(enum_field_types mysql_type)1972 type_supports_default_value(enum_field_types mysql_type)
1973 {
1974 bool ret = (mysql_type != MYSQL_TYPE_BLOB &&
1975 mysql_type != MYSQL_TYPE_TINY_BLOB &&
1976 mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&
1977 mysql_type != MYSQL_TYPE_LONG_BLOB &&
1978 mysql_type != MYSQL_TYPE_JSON &&
1979 mysql_type != MYSQL_TYPE_GEOMETRY);
1980
1981 return ret;
1982 }
1983
1984 /**
1985 Check that Ndb data dictionary has the same default values
1986 as MySQLD for the current table.
1987 Called as part of a DBUG check as part of table open
1988
1989 Returns
1990 0 - Defaults are ok
1991 -1 - Some default(s) are bad
1992 */
check_default_values(const NDBTAB * ndbtab)1993 int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)
1994 {
1995 /* Debug only method for checking table defaults aligned
1996 between MySQLD and Ndb
1997 */
1998 bool defaults_aligned= true;
1999
2000 if (ndbtab->hasDefaultValues())
2001 {
2002 /* Ndb supports native defaults for non-pk columns */
2003 my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);
2004
2005 for (uint f=0; f < table_share->fields; f++)
2006 {
2007 Field* field= table->field[f]; // Use Field struct from MySQLD table rep
2008 const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index);
2009
2010 if ((! (field->flags & (PRI_KEY_FLAG |
2011 NO_DEFAULT_VALUE_FLAG))) &&
2012 type_supports_default_value(field->real_type()))
2013 {
2014 /* We expect Ndb to have a native default for this
2015 * column
2016 */
2017 my_ptrdiff_t src_offset= table_share->default_values -
2018 field->table->record[0];
2019
2020 /* Move field by offset to refer to default value */
2021 field->move_field_offset(src_offset);
2022
2023 const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();
2024
2025 if (ndb_default == NULL)
2026 /* MySQLD default must also be NULL */
2027 defaults_aligned= field->is_null();
2028 else
2029 {
2030 if (field->type() != MYSQL_TYPE_BIT)
2031 {
2032 defaults_aligned= (0 == field->cmp(ndb_default));
2033 }
2034 else
2035 {
2036 longlong value= (static_cast<Field_bit*>(field))->val_int();
2037 /* Map to NdbApi format - two Uint32s */
2038 Uint32 out[2];
2039 out[0] = 0;
2040 out[1] = 0;
2041 for (int b=0; b < 64; b++)
2042 {
2043 out[b >> 5] |= (value & 1) << (b & 31);
2044
2045 value= value >> 1;
2046 }
2047 Uint32 defaultLen = field_used_length(field);
2048 defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);
2049 defaults_aligned= (0 == memcmp(ndb_default,
2050 out,
2051 defaultLen));
2052 }
2053 }
2054
2055 field->move_field_offset(-src_offset);
2056
2057 if (unlikely(!defaults_aligned))
2058 {
2059 sql_print_error("NDB Internal error: Default values differ "
2060 "for column %u, ndb_default: %d",
2061 field->field_index, ndb_default != NULL);
2062 }
2063 }
2064 else
2065 {
2066 /* We don't expect Ndb to have a native default for this column */
2067 if (unlikely(ndbCol->getDefaultValue() != NULL))
2068 {
2069 /* Didn't expect that */
2070 sql_print_error("NDB Internal error: Column %u has native "
2071 "default, but shouldn't. Flags=%u, type=%u",
2072 field->field_index, field->flags,
2073 field->real_type());
2074 defaults_aligned= false;
2075 }
2076 }
2077 if (unlikely(!defaults_aligned))
2078 {
2079 // Dump field
2080 sql_print_error("field[ name: '%s', type: %u, real_type: %u, "
2081 "flags: 0x%x, is_null: %d]",
2082 field->field_name, field->type(), field->real_type(),
2083 field->flags, field->is_null());
2084 // Dump ndbCol
2085 sql_print_error("ndbCol[name: '%s', type: %u, column_no: %d, "
2086 "nullable: %d]",
2087 ndbCol->getName(), ndbCol->getType(),
2088 ndbCol->getColumnNo(), ndbCol->getNullable());
2089 break;
2090 }
2091 }
2092 tmp_restore_column_map(table->read_set, old_map);
2093 }
2094
2095 return (defaults_aligned? 0: -1);
2096 }
2097
get_metadata(THD * thd,const char * path)2098 int ha_ndbcluster::get_metadata(THD *thd, const char *path)
2099 {
2100 Ndb *ndb= get_thd_ndb(thd)->ndb;
2101 NDBDICT *dict= ndb->getDictionary();
2102 const NDBTAB *tab;
2103 int error;
2104 DBUG_ENTER("get_metadata");
2105 DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));
2106
2107 assert(m_table == NULL);
2108 assert(m_table_info == NULL);
2109
2110 uchar *data= NULL, *pack_data= NULL;
2111 size_t length, pack_length;
2112
2113 /*
2114 Compare FrmData in NDB with frm file from disk.
2115 */
2116 error= 0;
2117 if (readfrm(path, &data, &length) ||
2118 packfrm(data, length, &pack_data, &pack_length))
2119 {
2120 my_free(data, MYF(MY_ALLOW_ZERO_PTR));
2121 my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));
2122 DBUG_RETURN(1);
2123 }
2124
2125 ndb->setDatabaseName(m_dbname);
2126 Ndb_table_guard ndbtab_g(dict, m_tabname);
2127 if (!(tab= ndbtab_g.get_table()))
2128 ERR_RETURN(dict->getNdbError());
2129
2130 if (get_ndb_share_state(m_share) != NSS_ALTERED
2131 && cmp_frm(tab, pack_data, pack_length))
2132 {
2133 DBUG_PRINT("error",
2134 ("metadata, pack_length: %lu getFrmLength: %d memcmp: %d",
2135 (ulong) pack_length, tab->getFrmLength(),
2136 memcmp(pack_data, tab->getFrmData(), pack_length)));
2137 DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);
2138 DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());
2139 error= HA_ERR_TABLE_DEF_CHANGED;
2140 }
2141 my_free((char*)data, MYF(0));
2142 my_free((char*)pack_data, MYF(0));
2143
2144 /* Now check that any Ndb native defaults are aligned
2145 with MySQLD defaults
2146 */
2147 assert(check_default_values(tab) == 0);
2148
2149 if (error)
2150 goto err;
2151
2152 DBUG_PRINT("info", ("fetched table %s", tab->getName()));
2153 m_table= tab;
2154
2155 if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))
2156 {
2157 error= HA_ERR_OUT_OF_MEM;
2158 goto err;
2159 }
2160 if (table_share->primary_key == MAX_KEY)
2161 {
2162 /* Hidden primary key. */
2163 if ((error= add_hidden_pk_ndb_record(dict)) != 0)
2164 goto err;
2165 }
2166
2167 if ((error= add_table_ndb_record(dict)) != 0)
2168 goto err;
2169
2170 /*
2171 Approx. write size in bytes over transporter
2172 */
2173 m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
2174
2175 /* Open indexes */
2176 if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)
2177 goto err;
2178
2179 /* Read foreign keys where this table is child or parent */
2180 if ((error= get_fk_data(thd, ndb)) != 0)
2181 goto err;
2182
2183 /*
2184 Backward compatibility for tables created without tablespace
2185 in .frm => read tablespace setting from engine
2186 */
2187 if (table_share->mysql_version < 50120 &&
2188 !table_share->tablespace /* safety */)
2189 {
2190 Uint32 id;
2191 if (tab->getTablespace(&id))
2192 {
2193 NdbDictionary::Tablespace ts= dict->getTablespace(id);
2194 NdbError ndberr= dict->getNdbError();
2195 if (ndberr.classification == NdbError::NoError)
2196 {
2197 const char *tablespace= ts.getName();
2198 const size_t tablespace_len= strlen(tablespace);
2199 if (tablespace_len != 0)
2200 {
2201 DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
2202 table_share->tablespace= strmake_root(&table_share->mem_root,
2203 tablespace,
2204 tablespace_len);
2205 }
2206 }
2207 }
2208 }
2209
2210 ndbtab_g.release();
2211
2212 DBUG_RETURN(0);
2213
2214 err:
2215 ndbtab_g.invalidate();
2216 m_table= NULL;
2217 DBUG_RETURN(error);
2218 }
2219
fix_unique_index_attr_order(NDB_INDEX_DATA & data,const NDBINDEX * index,KEY * key_info)2220 static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
2221 const NDBINDEX *index,
2222 KEY *key_info)
2223 {
2224 DBUG_ENTER("fix_unique_index_attr_order");
2225 unsigned sz= index->getNoOfIndexColumns();
2226
2227 if (data.unique_index_attrid_map)
2228 my_free((char*)data.unique_index_attrid_map, MYF(0));
2229 data.unique_index_attrid_map= (uchar*)my_malloc(PSI_INSTRUMENT_ME, sz,MYF(MY_WME));
2230 if (data.unique_index_attrid_map == 0)
2231 {
2232 sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
2233 (unsigned int)sz);
2234 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
2235 }
2236
2237 KEY_PART_INFO* key_part= key_info->key_part;
2238 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2239 assert(key_info->user_defined_key_parts == sz);
2240 for (unsigned i= 0; key_part != end; key_part++, i++)
2241 {
2242 const char *field_name= key_part->field->field_name;
2243 #ifndef NDEBUG
2244 data.unique_index_attrid_map[i]= 255;
2245 #endif
2246 for (unsigned j= 0; j < sz; j++)
2247 {
2248 const NDBCOL *c= index->getColumn(j);
2249 if (strcmp(field_name, c->getName()) == 0)
2250 {
2251 data.unique_index_attrid_map[i]= j;
2252 break;
2253 }
2254 }
2255 assert(data.unique_index_attrid_map[i] != 255);
2256 }
2257 DBUG_RETURN(0);
2258 }
2259
2260 /*
2261 Create all the indexes for a table.
2262 If any index should fail to be created,
2263 the error is returned immediately
2264 */
create_indexes(THD * thd,Ndb * ndb,TABLE * tab) const2265 int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab) const
2266 {
2267 uint i;
2268 int error= 0;
2269 const char *index_name;
2270 KEY* key_info= tab->key_info;
2271 const char **key_name= tab->s->keynames.type_names;
2272 DBUG_ENTER("ha_ndbcluster::create_indexes");
2273
2274 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2275 {
2276 index_name= *key_name;
2277 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2278 error= create_index(thd, index_name, key_info, idx_type, i);
2279 if (error)
2280 {
2281 DBUG_PRINT("error", ("Failed to create index %u", i));
2282 break;
2283 }
2284 }
2285
2286 DBUG_RETURN(error);
2287 }
2288
ndb_init_index(NDB_INDEX_DATA & data)2289 static void ndb_init_index(NDB_INDEX_DATA &data)
2290 {
2291 data.type= UNDEFINED_INDEX;
2292 data.status= UNDEFINED;
2293 data.unique_index= NULL;
2294 data.index= NULL;
2295 data.unique_index_attrid_map= NULL;
2296 data.ndb_record_key= NULL;
2297 data.ndb_unique_record_key= NULL;
2298 data.ndb_unique_record_row= NULL;
2299 }
2300
ndb_clear_index(NDBDICT * dict,NDB_INDEX_DATA & data)2301 static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)
2302 {
2303 if (data.unique_index_attrid_map)
2304 {
2305 my_free((char*)data.unique_index_attrid_map, MYF(0));
2306 }
2307 if (data.ndb_unique_record_key)
2308 dict->releaseRecord(data.ndb_unique_record_key);
2309 if (data.ndb_unique_record_row)
2310 dict->releaseRecord(data.ndb_unique_record_row);
2311 if (data.ndb_record_key)
2312 dict->releaseRecord(data.ndb_record_key);
2313 ndb_init_index(data);
2314 }
2315
2316 static
ndb_protect_char(const char * from,char * to,uint to_length,char protect)2317 void ndb_protect_char(const char* from, char* to, uint to_length, char protect)
2318 {
2319 uint fpos= 0, tpos= 0;
2320
2321 while(from[fpos] != '\0' && tpos < to_length - 1)
2322 {
2323 if (from[fpos] == protect)
2324 {
2325 int len= 0;
2326 to[tpos++]= '@';
2327 if(tpos < to_length - 5)
2328 {
2329 len= sprintf(to+tpos, "00%u", (uint) protect);
2330 tpos+= len;
2331 }
2332 }
2333 else
2334 {
2335 to[tpos++]= from[fpos];
2336 }
2337 fpos++;
2338 }
2339 to[tpos]= '\0';
2340 }
2341
2342 /*
2343 Associate a direct reference to an index handle
2344 with an index (for faster access)
2345 */
add_index_handle(THD * thd,NDBDICT * dict,KEY * key_info,const char * key_name,uint index_no)2346 int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
2347 const char *key_name, uint index_no)
2348 {
2349 char index_name[FN_LEN + 1];
2350 int error= 0;
2351
2352 NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
2353 m_index[index_no].type= idx_type;
2354 DBUG_ENTER("ha_ndbcluster::add_index_handle");
2355 DBUG_PRINT("enter", ("table %s", m_tabname));
2356
2357 ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');
2358 if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
2359 {
2360 DBUG_PRINT("info", ("Get handle to index %s", index_name));
2361 const NDBINDEX *index;
2362 do
2363 {
2364 index= dict->getIndexGlobal(index_name, *m_table);
2365 if (!index)
2366 ERR_RETURN(dict->getNdbError());
2367 DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d",
2368 (long) index,
2369 index->getObjectId(),
2370 index->getObjectVersion() & 0xFFFFFF,
2371 index->getObjectVersion() >> 24,
2372 index->getObjectStatus()));
2373 assert(index->getObjectStatus() ==
2374 NdbDictionary::Object::Retrieved);
2375 break;
2376 } while (1);
2377 m_index[index_no].index= index;
2378 }
2379 if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
2380 {
2381 char unique_index_name[FN_LEN + 1];
2382 static const char* unique_suffix= "$unique";
2383 m_has_unique_index= TRUE;
2384 strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
2385 DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
2386 const NDBINDEX *index;
2387 do
2388 {
2389 index= dict->getIndexGlobal(unique_index_name, *m_table);
2390 if (!index)
2391 ERR_RETURN(dict->getNdbError());
2392 DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d",
2393 (long) index,
2394 index->getObjectId(),
2395 index->getObjectVersion() & 0xFFFFFF,
2396 index->getObjectVersion() >> 24,
2397 index->getObjectStatus()));
2398 assert(index->getObjectStatus() ==
2399 NdbDictionary::Object::Retrieved);
2400 break;
2401 } while (1);
2402 m_index[index_no].unique_index= index;
2403 error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
2404 }
2405
2406 if (!error)
2407 error= add_index_ndb_record(dict, key_info, index_no);
2408
2409 if (!error)
2410 m_index[index_no].status= ACTIVE;
2411
2412 DBUG_RETURN(error);
2413 }
2414
2415 /*
2416 We use this function to convert null bit masks, as found in class Field,
2417 to bit numbers, as used in NdbRecord.
2418 */
2419 static uint
null_bit_mask_to_bit_number(uchar bit_mask)2420 null_bit_mask_to_bit_number(uchar bit_mask)
2421 {
2422 switch (bit_mask)
2423 {
2424 case 0x1: return 0;
2425 case 0x2: return 1;
2426 case 0x4: return 2;
2427 case 0x8: return 3;
2428 case 0x10: return 4;
2429 case 0x20: return 5;
2430 case 0x40: return 6;
2431 case 0x80: return 7;
2432 default:
2433 assert(false);
2434 return 0;
2435 }
2436 }
2437
2438 static void
ndb_set_record_specification(uint field_no,NdbDictionary::RecordSpecification * spec,const TABLE * table,const NdbDictionary::Table * ndb_table)2439 ndb_set_record_specification(uint field_no,
2440 NdbDictionary::RecordSpecification *spec,
2441 const TABLE *table,
2442 const NdbDictionary::Table *ndb_table)
2443 {
2444 spec->column= ndb_table->getColumn(field_no);
2445 spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);
2446 if (table->field[field_no]->real_maybe_null())
2447 {
2448 spec->nullbit_byte_offset=
2449 Uint32(table->field[field_no]->null_offset());
2450 spec->nullbit_bit_in_byte=
2451 null_bit_mask_to_bit_number(table->field[field_no]->null_bit);
2452 }
2453 else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)
2454 {
2455 /* We need to store the position of the overflow bits. */
2456 const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);
2457 spec->nullbit_byte_offset=
2458 Uint32(field_bit->bit_ptr - table->record[0]);
2459 spec->nullbit_bit_in_byte= field_bit->bit_ofs;
2460 }
2461 else
2462 {
2463 spec->nullbit_byte_offset= 0;
2464 spec->nullbit_bit_in_byte= 0;
2465 }
2466 spec->column_flags= 0;
2467 if (table->field[field_no]->type() == MYSQL_TYPE_STRING &&
2468 table->field[field_no]->pack_length() == 0)
2469 {
2470 /*
2471 This is CHAR(0), which we represent as
2472 a nullable BIT(1) column where we ignore the data bit
2473 */
2474 spec->column_flags |=
2475 NdbDictionary::RecordSpecification::BitColMapsNullBitOnly;
2476 }
2477 }
2478
2479 int
add_table_ndb_record(NDBDICT * dict)2480 ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)
2481 {
2482 DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");
2483 NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2484 NdbRecord *rec;
2485 uint i;
2486
2487 for (i= 0; i < table_share->fields; i++)
2488 {
2489 ndb_set_record_specification(i, &spec[i], table, m_table);
2490 }
2491
2492 rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),
2493 NdbDictionary::RecMysqldBitfield |
2494 NdbDictionary::RecPerColumnFlags);
2495 if (! rec)
2496 ERR_RETURN(dict->getNdbError());
2497 m_ndb_record= rec;
2498
2499 DBUG_RETURN(0);
2500 }
2501
2502 /* Create NdbRecord for setting hidden primary key from Uint64. */
2503 int
add_hidden_pk_ndb_record(NDBDICT * dict)2504 ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)
2505 {
2506 DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");
2507 NdbDictionary::RecordSpecification spec[1];
2508 NdbRecord *rec;
2509
2510 spec[0].column= m_table->getColumn(table_share->fields);
2511 spec[0].offset= 0;
2512 spec[0].nullbit_byte_offset= 0;
2513 spec[0].nullbit_bit_in_byte= 0;
2514
2515 rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));
2516 if (! rec)
2517 ERR_RETURN(dict->getNdbError());
2518 m_ndb_hidden_key_record= rec;
2519
2520 DBUG_RETURN(0);
2521 }
2522
2523 int
add_index_ndb_record(NDBDICT * dict,KEY * key_info,uint index_no)2524 ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)
2525 {
2526 DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");
2527 NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2528 NdbRecord *rec;
2529
2530 Uint32 offset= 0;
2531 for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2532 {
2533 KEY_PART_INFO *kp= &key_info->key_part[i];
2534
2535 spec[i].column= m_table->getColumn(kp->fieldnr - 1);
2536 if (! spec[i].column)
2537 ERR_RETURN(dict->getNdbError());
2538 if (kp->null_bit)
2539 {
2540 /* Nullable column. */
2541 spec[i].offset= offset + 1; // First byte is NULL flag
2542 spec[i].nullbit_byte_offset= offset;
2543 spec[i].nullbit_bit_in_byte= 0;
2544 }
2545 else
2546 {
2547 /* Not nullable column. */
2548 spec[i].offset= offset;
2549 spec[i].nullbit_byte_offset= 0;
2550 spec[i].nullbit_bit_in_byte= 0;
2551 }
2552 offset+= kp->store_length;
2553 }
2554
2555 if (m_index[index_no].index)
2556 {
2557 /*
2558 Enable MysqldShrinkVarchar flag so that the two-byte length used by
2559 mysqld for short varchar keys is correctly converted into a one-byte
2560 length used by Ndb kernel.
2561 */
2562 rec= dict->createRecord(m_index[index_no].index, m_table,
2563 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2564 ( NdbDictionary::RecMysqldShrinkVarchar |
2565 NdbDictionary::RecMysqldBitfield ));
2566 if (! rec)
2567 ERR_RETURN(dict->getNdbError());
2568 m_index[index_no].ndb_record_key= rec;
2569 }
2570 else
2571 m_index[index_no].ndb_record_key= NULL;
2572
2573 if (m_index[index_no].unique_index)
2574 {
2575 rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2576 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2577 ( NdbDictionary::RecMysqldShrinkVarchar |
2578 NdbDictionary::RecMysqldBitfield ));
2579 if (! rec)
2580 ERR_RETURN(dict->getNdbError());
2581 m_index[index_no].ndb_unique_record_key= rec;
2582 }
2583 else if (index_no == table_share->primary_key)
2584 {
2585 /* The primary key is special, there is no explicit NDB index associated. */
2586 rec= dict->createRecord(m_table,
2587 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2588 ( NdbDictionary::RecMysqldShrinkVarchar |
2589 NdbDictionary::RecMysqldBitfield ));
2590 if (! rec)
2591 ERR_RETURN(dict->getNdbError());
2592 m_index[index_no].ndb_unique_record_key= rec;
2593 }
2594 else
2595 m_index[index_no].ndb_unique_record_key= NULL;
2596
2597 /* Now do the same, but this time with offsets from Field, for row access. */
2598 for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2599 {
2600 const KEY_PART_INFO *kp= &key_info->key_part[i];
2601
2602 spec[i].offset= kp->offset;
2603 if (kp->null_bit)
2604 {
2605 /* Nullable column. */
2606 spec[i].nullbit_byte_offset= kp->null_offset;
2607 spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);
2608 }
2609 else
2610 {
2611 /* Not nullable column. */
2612 spec[i].nullbit_byte_offset= 0;
2613 spec[i].nullbit_bit_in_byte= 0;
2614 }
2615 }
2616
2617 if (m_index[index_no].unique_index)
2618 {
2619 rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2620 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2621 NdbDictionary::RecMysqldBitfield);
2622 if (! rec)
2623 ERR_RETURN(dict->getNdbError());
2624 m_index[index_no].ndb_unique_record_row= rec;
2625 }
2626 else if (index_no == table_share->primary_key)
2627 {
2628 rec= dict->createRecord(m_table,
2629 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2630 NdbDictionary::RecMysqldBitfield);
2631 if (! rec)
2632 ERR_RETURN(dict->getNdbError());
2633 m_index[index_no].ndb_unique_record_row= rec;
2634 }
2635 else
2636 m_index[index_no].ndb_unique_record_row= NULL;
2637
2638 DBUG_RETURN(0);
2639 }
2640
2641 /*
2642 Associate index handles for each index of a table
2643 */
open_indexes(THD * thd,Ndb * ndb,TABLE * tab,bool ignore_error)2644 int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,
2645 bool ignore_error)
2646 {
2647 uint i;
2648 int error= 0;
2649 NDBDICT *dict= ndb->getDictionary();
2650 KEY* key_info= tab->key_info;
2651 const char **key_name= tab->s->keynames.type_names;
2652 DBUG_ENTER("ha_ndbcluster::open_indexes");
2653 m_has_unique_index= FALSE;
2654 btree_keys.clear_all();
2655 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2656 {
2657 if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
2658 {
2659 if (ignore_error)
2660 m_index[i].index= m_index[i].unique_index= NULL;
2661 else
2662 break;
2663 }
2664 m_index[i].null_in_unique_index= FALSE;
2665 if (check_index_fields_not_null(key_info))
2666 m_index[i].null_in_unique_index= TRUE;
2667
2668 if (error == 0 && MY_TEST(index_flags(i, 0, 0) & HA_READ_RANGE))
2669 btree_keys.set_bit(i);
2670 }
2671
2672 if (error && !ignore_error)
2673 {
2674 while (i > 0)
2675 {
2676 i--;
2677 if (m_index[i].index)
2678 {
2679 dict->removeIndexGlobal(*m_index[i].index, 1);
2680 m_index[i].index= NULL;
2681 }
2682 if (m_index[i].unique_index)
2683 {
2684 dict->removeIndexGlobal(*m_index[i].unique_index, 1);
2685 m_index[i].unique_index= NULL;
2686 }
2687 }
2688 }
2689
2690 assert(error == 0 || error == 4243);
2691
2692 DBUG_RETURN(error);
2693 }
2694
2695 /*
2696 Renumber indexes in index list by shifting out
2697 indexes that are to be dropped
2698 */
renumber_indexes(Ndb * ndb,TABLE * tab)2699 void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
2700 {
2701 uint i;
2702 const char *index_name;
2703 KEY* key_info= tab->key_info;
2704 const char **key_name= tab->s->keynames.type_names;
2705 DBUG_ENTER("ha_ndbcluster::renumber_indexes");
2706
2707 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2708 {
2709 index_name= *key_name;
2710 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2711 m_index[i].type= idx_type;
2712 if (m_index[i].status == TO_BE_DROPPED)
2713 {
2714 DBUG_PRINT("info", ("Shifting index %s(%i) out of the list",
2715 index_name, i));
2716 NDB_INDEX_DATA tmp;
2717 uint j= i + 1;
2718 // Shift index out of list
2719 while(j != MAX_KEY && m_index[j].status != UNDEFINED)
2720 {
2721 tmp= m_index[j - 1];
2722 m_index[j - 1]= m_index[j];
2723 m_index[j]= tmp;
2724 j++;
2725 }
2726 }
2727 }
2728
2729 DBUG_VOID_RETURN;
2730 }
2731
2732 /*
2733 Drop all indexes that are marked for deletion
2734 */
drop_indexes(Ndb * ndb,TABLE * tab)2735 int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
2736 {
2737 uint i;
2738 int error= 0;
2739 const char *index_name;
2740 KEY* key_info= tab->key_info;
2741 NDBDICT *dict= ndb->getDictionary();
2742 DBUG_ENTER("ha_ndbcluster::drop_indexes");
2743
2744 for (i= 0; i < tab->s->keys; i++, key_info++)
2745 {
2746 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2747 m_index[i].type= idx_type;
2748 if (m_index[i].status == TO_BE_DROPPED)
2749 {
2750 const NdbDictionary::Index *index= m_index[i].index;
2751 const NdbDictionary::Index *unique_index= m_index[i].unique_index;
2752
2753 if (index)
2754 {
2755 index_name= index->getName();
2756 DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));
2757 // Drop ordered index from ndb
2758 if (dict->dropIndexGlobal(*index) == 0)
2759 {
2760 dict->removeIndexGlobal(*index, 1);
2761 m_index[i].index= NULL;
2762 }
2763 else
2764 {
2765 error= ndb_to_mysql_error(&dict->getNdbError());
2766 m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
2767 }
2768 }
2769 if (!error && unique_index)
2770 {
2771 index_name= unique_index->getName();
2772 DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
2773 // Drop unique index from ndb
2774 if (dict->dropIndexGlobal(*unique_index) == 0)
2775 {
2776 dict->removeIndexGlobal(*unique_index, 1);
2777 m_index[i].unique_index= NULL;
2778 }
2779 else
2780 {
2781 error=ndb_to_mysql_error(&dict->getNdbError());
2782 m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
2783 }
2784 }
2785 if (error)
2786 DBUG_RETURN(error);
2787 ndb_clear_index(dict, m_index[i]);
2788 continue;
2789 }
2790 }
2791
2792 DBUG_RETURN(error);
2793 }
2794
2795 /**
2796 Decode the type of an index from information
2797 provided in table object.
2798 */
get_index_type_from_table(uint inx) const2799 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
2800 {
2801 return get_index_type_from_key(inx, table_share->key_info,
2802 inx == table_share->primary_key);
2803 }
2804
get_index_type_from_key(uint inx,KEY * key_info,bool primary) const2805 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
2806 KEY *key_info,
2807 bool primary) const
2808 {
2809 bool is_hash_index= (key_info[inx].algorithm ==
2810 HA_KEY_ALG_HASH);
2811 if (primary)
2812 return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
2813
2814 return ((key_info[inx].flags & HA_NOSAME) ?
2815 (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
2816 ORDERED_INDEX);
2817 }
2818
check_index_fields_not_null(KEY * key_info) const2819 bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info) const
2820 {
2821 KEY_PART_INFO* key_part= key_info->key_part;
2822 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2823 DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
2824
2825 for (; key_part != end; key_part++)
2826 {
2827 Field* field= key_part->field;
2828 if (field->maybe_null())
2829 DBUG_RETURN(TRUE);
2830 }
2831
2832 DBUG_RETURN(FALSE);
2833 }
2834
release_metadata(THD * thd,Ndb * ndb)2835 void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
2836 {
2837 uint i;
2838
2839 DBUG_ENTER("release_metadata");
2840 DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
2841
2842 NDBDICT *dict= ndb->getDictionary();
2843 int invalidate_indexes= 0;
2844 if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
2845 {
2846 invalidate_indexes = 1;
2847 }
2848 if (m_table != NULL)
2849 {
2850 if (m_ndb_record != NULL)
2851 {
2852 dict->releaseRecord(m_ndb_record);
2853 m_ndb_record= NULL;
2854 }
2855 if (m_ndb_hidden_key_record != NULL)
2856 {
2857 dict->releaseRecord(m_ndb_hidden_key_record);
2858 m_ndb_hidden_key_record= NULL;
2859 }
2860 if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
2861 invalidate_indexes= 1;
2862 dict->removeTableGlobal(*m_table, invalidate_indexes);
2863 }
2864 // TODO investigate
2865 assert(m_table_info == NULL);
2866 m_table_info= NULL;
2867
2868 // Release index list
2869 for (i= 0; i < MAX_KEY; i++)
2870 {
2871 if (m_index[i].unique_index)
2872 {
2873 assert(m_table != NULL);
2874 dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
2875 }
2876 if (m_index[i].index)
2877 {
2878 assert(m_table != NULL);
2879 dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
2880 }
2881 ndb_clear_index(dict, m_index[i]);
2882 }
2883
2884 // Release FK data
2885 release_fk_data(thd);
2886
2887 m_table= NULL;
2888 DBUG_VOID_RETURN;
2889 }
2890
2891
2892 /*
2893 Map from thr_lock_type to NdbOperation::LockMode
2894 */
2895 static inline
get_ndb_lock_mode(enum thr_lock_type type)2896 NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)
2897 {
2898 if (type >= TL_WRITE_ALLOW_WRITE)
2899 return NdbOperation::LM_Exclusive;
2900 if (type == TL_READ_WITH_SHARED_LOCKS)
2901 return NdbOperation::LM_Read;
2902 return NdbOperation::LM_CommittedRead;
2903 }
2904
2905
2906 static const ulong index_type_flags[]=
2907 {
2908 /* UNDEFINED_INDEX */
2909 0,
2910
2911 /* PRIMARY_KEY_INDEX */
2912 HA_ONLY_WHOLE_INDEX,
2913
2914 /* PRIMARY_KEY_ORDERED_INDEX */
2915 /*
2916 Enable HA_KEYREAD_ONLY when "sorted" indexes are supported,
2917 thus ORDER BY clauses can be optimized by reading directly
2918 through the index.
2919 */
2920 // HA_KEYREAD_ONLY |
2921 HA_READ_NEXT |
2922 HA_READ_PREV |
2923 HA_READ_RANGE |
2924 HA_READ_ORDER,
2925
2926 /* UNIQUE_INDEX */
2927 HA_ONLY_WHOLE_INDEX,
2928
2929 /* UNIQUE_ORDERED_INDEX */
2930 HA_READ_NEXT |
2931 HA_READ_PREV |
2932 HA_READ_RANGE |
2933 HA_READ_ORDER,
2934
2935 /* ORDERED_INDEX */
2936 HA_READ_NEXT |
2937 HA_READ_PREV |
2938 HA_READ_RANGE |
2939 HA_READ_ORDER
2940 };
2941
2942 static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);
2943
get_index_type(uint idx_no) const2944 inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
2945 {
2946 assert(idx_no < MAX_KEY);
2947 return m_index[idx_no].type;
2948 }
2949
has_null_in_unique_index(uint idx_no) const2950 inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
2951 {
2952 assert(idx_no < MAX_KEY);
2953 return m_index[idx_no].null_in_unique_index;
2954 }
2955
2956
2957 /**
2958 Get the flags for an index.
2959
2960 @return
2961 flags depending on the type of the index.
2962 */
2963
index_flags(uint idx_no,uint part,bool all_parts) const2964 inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
2965 bool all_parts) const
2966 {
2967 DBUG_ENTER("ha_ndbcluster::index_flags");
2968 DBUG_PRINT("enter", ("idx_no: %u", idx_no));
2969 assert(get_index_type_from_table(idx_no) < index_flags_size);
2970 DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] |
2971 HA_KEY_SCAN_NOT_ROR);
2972 }
2973
2974 bool
primary_key_is_clustered() const2975 ha_ndbcluster::primary_key_is_clustered() const
2976 {
2977
2978 if (table->s->primary_key == MAX_KEY)
2979 return false;
2980
2981 /*
2982 NOTE 1: our ordered indexes are not really clustered
2983 but since accesing data when scanning index is free
2984 it's a good approximation
2985
2986 NOTE 2: We really should consider DD attributes here too
2987 (for which there is IO to read data when scanning index)
2988 but that will need to be handled later...
2989 */
2990 const ndb_index_type idx_type =
2991 get_index_type_from_table(table->s->primary_key);
2992 return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||
2993 idx_type == UNIQUE_ORDERED_INDEX ||
2994 idx_type == ORDERED_INDEX);
2995 }
2996
check_index_fields_in_write_set(uint keyno)2997 bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)
2998 {
2999 KEY* key_info= table->key_info + keyno;
3000 KEY_PART_INFO* key_part= key_info->key_part;
3001 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
3002 uint i;
3003 DBUG_ENTER("check_index_fields_in_write_set");
3004
3005 for (i= 0; key_part != end; key_part++, i++)
3006 {
3007 Field* field= key_part->field;
3008 if (!bitmap_is_set(table->write_set, field->field_index))
3009 {
3010 DBUG_RETURN(false);
3011 }
3012 }
3013
3014 DBUG_RETURN(true);
3015 }
3016
3017
3018 /**
3019 Read one record from NDB using primary key.
3020 */
3021
pk_read(const uchar * key,uint key_len,uchar * buf,uint32 * part_id)3022 int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,
3023 uint32 *part_id)
3024 {
3025 NdbConnection *trans= m_thd_ndb->trans;
3026 int res;
3027 DBUG_ENTER("pk_read");
3028 DBUG_PRINT("enter", ("key_len: %u read_set=%x",
3029 key_len, table->read_set->bitmap[0]));
3030 DBUG_DUMP("key", key, key_len);
3031 assert(trans);
3032
3033 NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3034
3035 if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess,
3036 table->s->primary_key))
3037 {
3038 // Is parent of pushed join
3039 assert(lm == NdbOperation::LM_CommittedRead);
3040 const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,
3041 (m_user_defined_partitioning ?
3042 part_id : NULL));
3043 if (unlikely(error))
3044 DBUG_RETURN(error);
3045
3046 assert(m_active_query!=NULL);
3047 if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3048 m_active_query->getNdbError().code)
3049 {
3050 table->status= STATUS_NOT_FOUND;
3051 DBUG_RETURN(ndb_err(trans));
3052 }
3053
3054 int result= fetch_next_pushed();
3055 if (result == NdbQuery::NextResult_gotRow)
3056 {
3057 DBUG_RETURN(0);
3058 }
3059 else if (result == NdbQuery::NextResult_scanComplete)
3060 {
3061 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3062 }
3063 else
3064 {
3065 DBUG_RETURN(ndb_err(trans));
3066 }
3067 }
3068 else
3069 {
3070 if (m_pushed_join_operation == PUSHED_ROOT)
3071 {
3072 m_thd_ndb->m_pushed_queries_dropped++;
3073 }
3074
3075 const NdbOperation *op;
3076 if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,
3077 (m_user_defined_partitioning ?
3078 part_id :
3079 NULL))))
3080 ERR_RETURN(trans->getNdbError());
3081
3082 if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3083 op->getNdbError().code)
3084 {
3085 table->status= STATUS_NOT_FOUND;
3086 DBUG_RETURN(ndb_err(trans));
3087 }
3088 table->status= 0;
3089 DBUG_RETURN(0);
3090 }
3091 }
3092
3093 /**
3094 Update primary key or part id by doing delete insert.
3095 */
3096
ndb_pk_update_row(THD * thd,const uchar * old_data,uchar * new_data)3097 int ha_ndbcluster::ndb_pk_update_row(THD *thd,
3098 const uchar *old_data, uchar *new_data)
3099 {
3100 NdbTransaction *trans= m_thd_ndb->trans;
3101 int error;
3102 DBUG_ENTER("ndb_pk_update_row");
3103 assert(trans);
3104
3105 DBUG_PRINT("info", ("primary key update or partition change, "
3106 "doing delete+insert"));
3107
3108 #ifndef NDEBUG
3109 /*
3110 * 'old_data' contain colums as specified in 'read_set'.
3111 * All PK columns must be included for ::ndb_delete_row()
3112 */
3113 assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
3114 /*
3115 * As a complete 'new_data' row is reinserted after the delete,
3116 * all columns must be contained in the read+write union.
3117 */
3118 bitmap_copy(&m_bitmap, table->read_set);
3119 bitmap_union(&m_bitmap, table->write_set);
3120 assert(bitmap_is_set_all(&m_bitmap));
3121 #endif
3122
3123 // Delete old row
3124 error= ndb_delete_row(old_data, TRUE);
3125 if (error)
3126 {
3127 DBUG_PRINT("info", ("delete failed"));
3128 DBUG_RETURN(error);
3129 }
3130
3131 // Insert new row
3132 DBUG_PRINT("info", ("delete succeded"));
3133 bool batched_update= (m_active_cursor != 0);
3134 /*
3135 If we are updating a primary key with auto_increment
3136 then we need to update the auto_increment counter
3137 */
3138 if (table->found_next_number_field &&
3139 bitmap_is_set(table->write_set,
3140 table->found_next_number_field->field_index) &&
3141 (error= set_auto_inc(thd, table->found_next_number_field)))
3142 {
3143 DBUG_RETURN(error);
3144 }
3145
3146 /*
3147 We are mapping a MySQLD PK changing update to an NdbApi delete
3148 and insert.
3149 The original PK changing update may not have written new values
3150 to all columns, so the write set may be partial.
3151 We set the write set to be all columns so that all values are
3152 copied from the old row to the new row.
3153 */
3154 my_bitmap_map *old_map=
3155 tmp_use_all_columns(table, table->write_set);
3156 error= ndb_write_row(new_data, TRUE, batched_update);
3157 tmp_restore_column_map(table->write_set, old_map);
3158
3159 if (error)
3160 {
3161 DBUG_PRINT("info", ("insert failed"));
3162 if (trans->commitStatus() == NdbConnection::Started)
3163 {
3164 if (thd->slave_thread)
3165 g_ndb_slave_state.atTransactionAbort();
3166 m_thd_ndb->m_unsent_bytes= 0;
3167 m_thd_ndb->m_execute_count++;
3168 DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));
3169 trans->execute(NdbTransaction::Rollback);
3170 #ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA
3171 int undo_res;
3172 // Undo delete_row(old_data)
3173 undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);
3174 if (undo_res)
3175 push_warning(table->in_use,
3176 Sql_condition::SL_WARNING,
3177 undo_res,
3178 "NDB failed undoing delete at primary key update");
3179 #endif
3180 }
3181 DBUG_RETURN(error);
3182 }
3183 DBUG_PRINT("info", ("delete+insert succeeded"));
3184
3185 DBUG_RETURN(0);
3186 }
3187
3188 /**
3189 Check that all operations between first and last all
3190 have gotten the errcode
3191 If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
3192 for all succeeding operations
3193 */
check_all_operations_for_error(NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint errcode)3194 bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
3195 const NdbOperation *first,
3196 const NdbOperation *last,
3197 uint errcode)
3198 {
3199 const NdbOperation *op= first;
3200 DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");
3201
3202 while(op)
3203 {
3204 NdbError err= op->getNdbError();
3205 if (err.status != NdbError::Success)
3206 {
3207 if (ndb_to_mysql_error(&err) != (int) errcode)
3208 DBUG_RETURN(FALSE);
3209 if (op == last) break;
3210 op= trans->getNextCompletedOperation(op);
3211 }
3212 else
3213 {
3214 // We found a duplicate
3215 if (op->getType() == NdbOperation::UniqueIndexAccess)
3216 {
3217 if (errcode == HA_ERR_KEY_NOT_FOUND)
3218 {
3219 NdbIndexOperation *iop= (NdbIndexOperation *) op;
3220 const NDBINDEX *index= iop->getIndex();
3221 // Find the key_no of the index
3222 for(uint i= 0; i<table->s->keys; i++)
3223 {
3224 if (m_index[i].unique_index == index)
3225 {
3226 m_dupkey= i;
3227 break;
3228 }
3229 }
3230 }
3231 }
3232 else
3233 {
3234 // Must have been primary key access
3235 assert(op->getType() == NdbOperation::PrimaryKeyAccess);
3236 if (errcode == HA_ERR_KEY_NOT_FOUND)
3237 m_dupkey= table->s->primary_key;
3238 }
3239 DBUG_RETURN(FALSE);
3240 }
3241 }
3242 DBUG_RETURN(TRUE);
3243 }
3244
3245
3246 /**
3247 * Check if record contains any null valued columns that are part of a key
3248 */
3249 static
3250 int
check_null_in_record(const KEY * key_info,const uchar * record)3251 check_null_in_record(const KEY* key_info, const uchar *record)
3252 {
3253 KEY_PART_INFO *curr_part, *end_part;
3254 curr_part= key_info->key_part;
3255 end_part= curr_part + key_info->user_defined_key_parts;
3256
3257 while (curr_part != end_part)
3258 {
3259 if (curr_part->null_bit &&
3260 (record[curr_part->null_offset] & curr_part->null_bit))
3261 return 1;
3262 curr_part++;
3263 }
3264 return 0;
3265 /*
3266 We could instead pre-compute a bitmask in table_share with one bit for
3267 every null-bit in the key, and so check this just by OR'ing the bitmask
3268 with the null bitmap in the record.
3269 But not sure it's worth it.
3270 */
3271 }
3272
3273 /* Empty mask and dummy row, for reading no attributes using NdbRecord. */
3274 /* Mask will be initialized to all zeros by linker. */
3275 static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];
3276 static char dummy_row[1];
3277
3278 /**
3279 Peek to check if any rows already exist with conflicting
3280 primary key or unique index values
3281 */
3282
peek_indexed_rows(const uchar * record,NDB_WRITE_OP write_op)3283 int ha_ndbcluster::peek_indexed_rows(const uchar *record,
3284 NDB_WRITE_OP write_op)
3285 {
3286 NdbTransaction *trans;
3287 const NdbOperation *op;
3288 const NdbOperation *first, *last;
3289 NdbOperation::OperationOptions options;
3290 NdbOperation::OperationOptions *poptions=NULL;
3291 options.optionsPresent = 0;
3292 uint i;
3293 int res, error;
3294 DBUG_ENTER("peek_indexed_rows");
3295 if (unlikely(!(trans= get_transaction(error))))
3296 {
3297 DBUG_RETURN(error);
3298 }
3299 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
3300 first= NULL;
3301 if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)
3302 {
3303 /*
3304 * Fetch any row with colliding primary key
3305 */
3306 const NdbRecord *key_rec=
3307 m_index[table->s->primary_key].ndb_unique_record_row;
3308
3309 if (m_user_defined_partitioning)
3310 {
3311 uint32 part_id;
3312 int error;
3313 longlong func_value;
3314 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3315 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
3316 dbug_tmp_restore_column_map(table->read_set, old_map);
3317 if (error)
3318 {
3319 m_part_info->err_value= func_value;
3320 DBUG_RETURN(error);
3321 }
3322 options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3323 options.partitionId=part_id;
3324 poptions=&options;
3325 }
3326
3327 if (!(op= trans->readTuple(key_rec, (const char *)record,
3328 m_ndb_record, dummy_row, lm, empty_mask,
3329 poptions,
3330 sizeof(NdbOperation::OperationOptions))))
3331 ERR_RETURN(trans->getNdbError());
3332
3333 first= op;
3334 }
3335 /*
3336 * Fetch any rows with colliding unique indexes
3337 */
3338 KEY* key_info;
3339 for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
3340 {
3341 if (i != table_share->primary_key &&
3342 key_info->flags & HA_NOSAME &&
3343 bitmap_is_overlapping(table->write_set, m_key_fields[i]))
3344 {
3345 /*
3346 A unique index is defined on table and it's being updated
3347 We cannot look up a NULL field value in a unique index. But since
3348 keys with NULLs are not indexed, such rows cannot conflict anyway, so
3349 we just skip the index in this case.
3350 */
3351 if (check_null_in_record(key_info, record))
3352 {
3353 DBUG_PRINT("info", ("skipping check for key with NULL"));
3354 continue;
3355 }
3356 if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))
3357 {
3358 DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));
3359 continue;
3360 }
3361
3362 const NdbOperation *iop;
3363 const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;
3364 if (!(iop= trans->readTuple(key_rec, (const char *)record,
3365 m_ndb_record, dummy_row,
3366 lm, empty_mask)))
3367 ERR_RETURN(trans->getNdbError());
3368
3369 if (!first)
3370 first= iop;
3371 }
3372 }
3373 last= trans->getLastDefinedOperation();
3374 if (first)
3375 res= execute_no_commit_ie(m_thd_ndb, trans);
3376 else
3377 {
3378 // Table has no keys
3379 table->status= STATUS_NOT_FOUND;
3380 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3381 }
3382 const NdbError ndberr= trans->getNdbError();
3383 error= ndberr.mysql_code;
3384 if ((error != 0 && error != HA_ERR_KEY_NOT_FOUND) ||
3385 check_all_operations_for_error(trans, first, last,
3386 HA_ERR_KEY_NOT_FOUND))
3387 {
3388 table->status= STATUS_NOT_FOUND;
3389 DBUG_RETURN(ndb_err(trans));
3390 }
3391 else
3392 {
3393 DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
3394 }
3395 DBUG_RETURN(0);
3396 }
3397
3398
3399 /**
3400 Read one record from NDB using unique secondary index.
3401 */
3402
unique_index_read(const uchar * key,uint key_len,uchar * buf)3403 int ha_ndbcluster::unique_index_read(const uchar *key,
3404 uint key_len, uchar *buf)
3405 {
3406 NdbTransaction *trans= m_thd_ndb->trans;
3407 DBUG_ENTER("ha_ndbcluster::unique_index_read");
3408 DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
3409 DBUG_DUMP("key", key, key_len);
3410 assert(trans);
3411
3412 NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3413
3414 if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess,
3415 active_index))
3416 {
3417 assert(lm == NdbOperation::LM_CommittedRead);
3418 const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);
3419 if (unlikely(error))
3420 DBUG_RETURN(error);
3421
3422 assert(m_active_query!=NULL);
3423 if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3424 m_active_query->getNdbError().code)
3425 {
3426 table->status= STATUS_GARBAGE;
3427 DBUG_RETURN(ndb_err(trans));
3428 }
3429
3430 int result= fetch_next_pushed();
3431 if (result == NdbQuery::NextResult_gotRow)
3432 {
3433 DBUG_RETURN(0);
3434 }
3435 else if (result == NdbQuery::NextResult_scanComplete)
3436 {
3437 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3438 }
3439 else
3440 {
3441 DBUG_RETURN(ndb_err(trans));
3442 }
3443 }
3444 else
3445 {
3446 if (m_pushed_join_operation == PUSHED_ROOT)
3447 {
3448 m_thd_ndb->m_pushed_queries_dropped++;
3449 }
3450
3451 const NdbOperation *op;
3452
3453 if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))
3454 ERR_RETURN(trans->getNdbError());
3455
3456 if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3457 op->getNdbError().code)
3458 {
3459 int err= ndb_err(trans);
3460 if(err==HA_ERR_KEY_NOT_FOUND)
3461 table->status= STATUS_NOT_FOUND;
3462 else
3463 table->status= STATUS_GARBAGE;
3464
3465 DBUG_RETURN(err);
3466 }
3467
3468 table->status= 0;
3469 DBUG_RETURN(0);
3470 }
3471 }
3472
3473 int
scan_handle_lock_tuple(NdbScanOperation * scanOp,NdbTransaction * trans)3474 ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,
3475 NdbTransaction *trans)
3476 {
3477 DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");
3478 if (m_lock_tuple)
3479 {
3480 /*
3481 Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
3482 (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
3483 LOCK WITH SHARE MODE) and row was not explictly unlocked
3484 with unlock_row() call
3485 */
3486 DBUG_PRINT("info", ("Keeping lock on scanned row"));
3487
3488 if (!(scanOp->lockCurrentTuple(trans, m_ndb_record,
3489 dummy_row, empty_mask)))
3490 {
3491 m_lock_tuple= false;
3492 ERR_RETURN(trans->getNdbError());
3493 }
3494
3495 /* Perform 'empty update' to mark the read in the binlog, iff required */
3496 /*
3497 * Lock_mode = exclusive
3498 * Session_state = marking_exclusive_reads
3499 * THEN
3500 * issue updateCurrentTuple with AnyValue explicitly set
3501 */
3502 if ((m_lock.type >= TL_WRITE_ALLOW_WRITE) &&
3503 ndb_log_exclusive_reads(current_thd))
3504 {
3505 if (scan_log_exclusive_read(scanOp, trans))
3506 {
3507 m_lock_tuple= false;
3508 ERR_RETURN(trans->getNdbError());
3509 }
3510 }
3511
3512 m_thd_ndb->m_unsent_bytes+=12;
3513 m_lock_tuple= false;
3514 }
3515 DBUG_RETURN(0);
3516 }
3517
fetch_next(NdbScanOperation * cursor)3518 inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
3519 {
3520 DBUG_ENTER("fetch_next");
3521 int local_check;
3522 int error;
3523 NdbTransaction *trans= m_thd_ndb->trans;
3524
3525 assert(trans);
3526 if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
3527 DBUG_RETURN(error);
3528
3529 bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
3530 m_lock.type != TL_READ_WITH_SHARED_LOCKS;
3531 do {
3532 DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
3533 /*
3534 We can only handle one tuple with blobs at a time.
3535 */
3536 if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)
3537 {
3538 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3539 DBUG_RETURN(ndb_err(trans));
3540 }
3541
3542 /* Should be no unexamined completed operations
3543 nextResult() on Blobs generates Blob part read ops,
3544 so we will free them here
3545 */
3546 release_completed_operations(trans);
3547
3548 if ((local_check= cursor->nextResult(&_m_next_row,
3549 contact_ndb,
3550 m_thd_ndb->m_force_send)) == 0)
3551 {
3552 /*
3553 Explicitly lock tuple if "select for update" or
3554 "select lock in share mode"
3555 */
3556 m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
3557 ||
3558 m_lock.type == TL_READ_WITH_SHARED_LOCKS);
3559 DBUG_RETURN(0);
3560 }
3561 else if (local_check == 1 || local_check == 2)
3562 {
3563 // 1: No more records
3564 // 2: No more cached records
3565
3566 /*
3567 Before fetching more rows and releasing lock(s),
3568 all pending update or delete operations should
3569 be sent to NDB
3570 */
3571 DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
3572 (long) m_thd_ndb->m_unsent_bytes));
3573 if (m_thd_ndb->m_unsent_bytes)
3574 {
3575 if ((error = flush_bulk_insert()) != 0)
3576 DBUG_RETURN(error);
3577 }
3578 contact_ndb= (local_check == 2);
3579 }
3580 else
3581 {
3582 DBUG_RETURN(ndb_err(trans));
3583 }
3584 } while (local_check == 2);
3585
3586 DBUG_RETURN(1);
3587 }
3588
fetch_next_pushed()3589 int ha_ndbcluster::fetch_next_pushed()
3590 {
3591 DBUG_ENTER("fetch_next_pushed (from pushed operation)");
3592
3593 assert(m_pushed_operation);
3594 NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);
3595
3596 /**
3597 * Only prepare result & status from this operation in pushed join.
3598 * Consecutive rows are prepared through ::index_read_pushed() and
3599 * ::index_next_pushed() which unpack and set correct status for each row.
3600 */
3601 if (result == NdbQuery::NextResult_gotRow)
3602 {
3603 assert(m_next_row!=NULL);
3604 DBUG_PRINT("info", ("One more record found"));
3605 table->status= 0;
3606 unpack_record(table->record[0], m_next_row);
3607 // m_thd_ndb->m_pushed_reads++;
3608 // DBUG_RETURN(0)
3609 }
3610 else if (result == NdbQuery::NextResult_scanComplete)
3611 {
3612 assert(m_next_row==NULL);
3613 DBUG_PRINT("info", ("No more records"));
3614 table->status= STATUS_NOT_FOUND;
3615 // m_thd_ndb->m_pushed_reads++;
3616 // DBUG_RETURN(HA_ERR_END_OF_FILE);
3617 }
3618 else
3619 {
3620 DBUG_PRINT("info", ("Error from 'nextResult()'"));
3621 table->status= STATUS_GARBAGE;
3622 // assert(false);
3623 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3624 }
3625 DBUG_RETURN(result);
3626 }
3627
3628 /**
3629 Get the first record from an indexed table access being a child
3630 operation in a pushed join. Fetch will be from prefetched
3631 cached records which are materialized into the bound buffer
3632 areas as result of this call.
3633 */
3634
3635 int
index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3636 ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,
3637 key_part_map keypart_map)
3638 {
3639 DBUG_ENTER("index_read_pushed");
3640
3641 // Handler might have decided to not execute the pushed joins which has been prepared
3642 // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3643 if (unlikely(!check_is_pushed()))
3644 {
3645 int res= index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT);
3646 if (!res && table->vfield)
3647 res= update_generated_read_fields(buf, table);
3648 DBUG_RETURN(res);
3649 }
3650
3651 // Might need to re-establish first result row (wrt. its parents which may have been navigated)
3652 NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();
3653
3654 // Result from pushed operation will be referred by 'm_next_row' if non-NULL
3655 if (result == NdbQuery::NextResult_gotRow)
3656 {
3657 assert(m_next_row!=NULL);
3658 unpack_record(buf, m_next_row);
3659 table->status= 0;
3660 m_thd_ndb->m_pushed_reads++;
3661 }
3662 else
3663 {
3664 assert(result!=NdbQuery::NextResult_gotRow);
3665 table->status= STATUS_NOT_FOUND;
3666 DBUG_PRINT("info", ("No record found"));
3667 // m_thd_ndb->m_pushed_reads++;
3668 // DBUG_RETURN(HA_ERR_END_OF_FILE);
3669 }
3670 DBUG_RETURN(0);
3671 }
3672
3673
3674 /**
3675 Get the next record from an indexes table access being a child
3676 operation in a pushed join. Fetch will be from prefetched
3677 cached records which are materialized into the bound buffer
3678 areas as result of this call.
3679 */
index_next_pushed(uchar * buf)3680 int ha_ndbcluster::index_next_pushed(uchar *buf)
3681 {
3682 DBUG_ENTER("index_next_pushed");
3683
3684 // Handler might have decided to not execute the pushed joins which has been prepared
3685 // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3686 if (unlikely(!check_is_pushed()))
3687 {
3688 int res= index_next(buf);
3689 if (!res && table->vfield)
3690 res= update_generated_read_fields(buf, table);
3691 DBUG_RETURN(res);
3692 }
3693
3694 assert(m_pushed_join_operation>PUSHED_ROOT); // Child of a pushed join
3695 assert(m_active_query==NULL);
3696
3697 int res = fetch_next_pushed();
3698 if (res == NdbQuery::NextResult_gotRow)
3699 {
3700 DBUG_RETURN(0);
3701 }
3702 else if (res == NdbQuery::NextResult_scanComplete)
3703 {
3704 DBUG_RETURN(HA_ERR_END_OF_FILE);
3705 }
3706 else
3707 {
3708 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3709 }
3710 }
3711
3712
3713 /**
3714 Get the next record of a started scan. Try to fetch
3715 it locally from NdbApi cached records if possible,
3716 otherwise ask NDB for more.
3717
3718 @note
3719 If this is a update/delete make sure to not contact
3720 NDB before any pending ops have been sent to NDB.
3721 */
3722
next_result(uchar * buf)3723 inline int ha_ndbcluster::next_result(uchar *buf)
3724 {
3725 int res;
3726 DBUG_ENTER("next_result");
3727
3728 if (m_active_cursor)
3729 {
3730 if ((res= fetch_next(m_active_cursor)) == 0)
3731 {
3732 DBUG_PRINT("info", ("One more record found"));
3733
3734 unpack_record(buf, m_next_row);
3735 table->status= 0;
3736 DBUG_RETURN(0);
3737 }
3738 else if (res == 1)
3739 {
3740 // No more records
3741 table->status= STATUS_NOT_FOUND;
3742
3743 DBUG_PRINT("info", ("No more records"));
3744 DBUG_RETURN(HA_ERR_END_OF_FILE);
3745 }
3746 else
3747 {
3748 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3749 }
3750 }
3751 else if (m_active_query)
3752 {
3753 res= fetch_next_pushed();
3754 if (res == NdbQuery::NextResult_gotRow)
3755 {
3756 DBUG_RETURN(0);
3757 }
3758 else if (res == NdbQuery::NextResult_scanComplete)
3759 {
3760 DBUG_RETURN(HA_ERR_END_OF_FILE);
3761 }
3762 else
3763 {
3764 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3765 }
3766 }
3767 else
3768 DBUG_RETURN(HA_ERR_END_OF_FILE);
3769 }
3770
3771 int
log_exclusive_read(const NdbRecord * key_rec,const uchar * key,uchar * buf,Uint32 * ppartition_id)3772 ha_ndbcluster::log_exclusive_read(const NdbRecord *key_rec,
3773 const uchar *key,
3774 uchar *buf,
3775 Uint32 *ppartition_id)
3776 {
3777 DBUG_ENTER("log_exclusive_read");
3778 NdbOperation::OperationOptions opts;
3779 opts.optionsPresent=
3780 NdbOperation::OperationOptions::OO_ABORTOPTION |
3781 NdbOperation::OperationOptions::OO_ANYVALUE;
3782
3783 /* If the key does not exist, that is ok */
3784 opts.abortOption= NdbOperation::AO_IgnoreError;
3785
3786 /*
3787 Mark the AnyValue as a read operation, so that the update
3788 is processed
3789 */
3790 opts.anyValue= 0;
3791 ndbcluster_anyvalue_set_read_op(opts.anyValue);
3792
3793 if (ppartition_id != NULL)
3794 {
3795 assert(m_user_defined_partitioning);
3796 opts.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3797 opts.partitionId= *ppartition_id;
3798 }
3799
3800 const NdbOperation* markingOp=
3801 m_thd_ndb->trans->updateTuple(key_rec,
3802 (const char*) key,
3803 m_ndb_record,
3804 (char*)buf,
3805 empty_mask,
3806 &opts,
3807 opts.size());
3808 if (!markingOp)
3809 {
3810 char msg[FN_REFLEN];
3811 my_snprintf(msg, sizeof(msg), "Error logging exclusive reads, failed creating markingOp, %u, %s\n",
3812 m_thd_ndb->trans->getNdbError().code,
3813 m_thd_ndb->trans->getNdbError().message);
3814 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
3815 ER_EXCEPTIONS_WRITE_ERROR,
3816 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
3817 /*
3818 By returning -1 the caller (pk_unique_index_read_key) will return
3819 NULL and error on transaction object will be returned.
3820 */
3821 DBUG_RETURN(-1);
3822 }
3823
3824 DBUG_RETURN(0);
3825 }
3826
3827 int
scan_log_exclusive_read(NdbScanOperation * cursor,NdbTransaction * trans)3828 ha_ndbcluster::scan_log_exclusive_read(NdbScanOperation *cursor,
3829 NdbTransaction *trans)
3830 {
3831 DBUG_ENTER("ha_ndbcluster::scan_log_exclusive_read");
3832 NdbOperation::OperationOptions opts;
3833 opts.optionsPresent= NdbOperation::OperationOptions::OO_ANYVALUE;
3834
3835 /*
3836 Mark the AnyValue as a read operation, so that the update
3837 is processed
3838 */
3839 opts.anyValue= 0;
3840 ndbcluster_anyvalue_set_read_op(opts.anyValue);
3841
3842 const NdbOperation* markingOp=
3843 cursor->updateCurrentTuple(trans, m_ndb_record,
3844 dummy_row, empty_mask,
3845 &opts,
3846 sizeof(NdbOperation::OperationOptions));
3847 if (markingOp == NULL)
3848 {
3849 char msg[FN_REFLEN];
3850 my_snprintf(msg, sizeof(msg), "Error logging exclusive reads during scan, failed creating markingOp, %u, %s\n",
3851 m_thd_ndb->trans->getNdbError().code,
3852 m_thd_ndb->trans->getNdbError().message);
3853 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
3854 ER_EXCEPTIONS_WRITE_ERROR,
3855 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
3856 DBUG_RETURN(-1);
3857 }
3858
3859 DBUG_RETURN(0);
3860 }
3861
3862 /**
3863 Do a primary key or unique key index read operation.
3864 The key value is taken from a buffer in mysqld key format.
3865 */
3866 const NdbOperation *
pk_unique_index_read_key(uint idx,const uchar * key,uchar * buf,NdbOperation::LockMode lm,Uint32 * ppartition_id)3867 ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,
3868 NdbOperation::LockMode lm,
3869 Uint32 *ppartition_id)
3870 {
3871 DBUG_ENTER("pk_unique_index_read_key");
3872 const NdbOperation *op;
3873 const NdbRecord *key_rec;
3874 NdbOperation::OperationOptions options;
3875 NdbOperation::OperationOptions *poptions = NULL;
3876 options.optionsPresent= 0;
3877 NdbOperation::GetValueSpec gets[2];
3878 ndb_index_type idx_type=
3879 (idx != MAX_KEY)?
3880 get_index_type(idx)
3881 : UNDEFINED_INDEX;
3882
3883 assert(m_thd_ndb->trans);
3884
3885 DBUG_PRINT("info", ("pk_unique_index_read_key of table %s", table->s->table_name.str));
3886
3887 if (idx != MAX_KEY)
3888 key_rec= m_index[idx].ndb_unique_record_key;
3889 else
3890 key_rec= m_ndb_hidden_key_record;
3891
3892 /* Initialize the null bitmap, setting unused null bits to 1. */
3893 memset(buf, 0xff, table->s->null_bytes);
3894
3895 if (table_share->primary_key == MAX_KEY)
3896 {
3897 get_hidden_fields_keyop(&options, gets);
3898 poptions= &options;
3899 }
3900 get_read_set(false, idx);
3901
3902 if (ppartition_id != NULL)
3903 {
3904 assert(m_user_defined_partitioning);
3905 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3906 options.partitionId= *ppartition_id;
3907 poptions= &options;
3908 }
3909
3910 op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,
3911 (char *)buf, lm,
3912 (uchar *)(table->read_set->bitmap), poptions,
3913 sizeof(NdbOperation::OperationOptions));
3914
3915 if (uses_blob_value(table->read_set) &&
3916 get_blob_values(op, buf, table->read_set) != 0)
3917 DBUG_RETURN(NULL);
3918
3919 /* Perform 'empty update' to mark the read in the binlog, iff required */
3920 /*
3921 * Lock_mode = exclusive
3922 * Index = primary or unique
3923 * Session_state = marking_exclusive_reads
3924 * THEN
3925 * issue updateTuple with AnyValue explicitly set
3926 */
3927 if ((lm == NdbOperation::LM_Exclusive) &&
3928 /*
3929 We don't need to check index type
3930 (idx_type == PRIMARY_KEY_INDEX ||
3931 idx_type == PRIMARY_KEY_ORDERED_INDEX ||
3932 idx_type == UNIQUE_ORDERED_INDEX ||
3933 idx_type == UNIQUE_INDEX)
3934 since this method is only invoked for
3935 primary or unique indexes, but we do need to check
3936 if it was a hidden primary key.
3937 */
3938 idx_type != UNDEFINED_INDEX &&
3939 ndb_log_exclusive_reads(current_thd))
3940 {
3941 if (log_exclusive_read(key_rec, key, buf, ppartition_id) != 0)
3942 DBUG_RETURN(NULL);
3943 }
3944
3945 DBUG_RETURN(op);
3946 }
3947
3948
3949 static
3950 bool
is_shrinked_varchar(const Field * field)3951 is_shrinked_varchar(const Field *field)
3952 {
3953 if (field->real_type() == MYSQL_TYPE_VARCHAR)
3954 {
3955 if (((Field_varstring*)field)->length_bytes == 1)
3956 return true;
3957 }
3958
3959 return false;
3960 }
3961
3962 int
pk_unique_index_read_key_pushed(uint idx,const uchar * key,Uint32 * ppartition_id)3963 ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx,
3964 const uchar *key,
3965 Uint32 *ppartition_id)
3966 {
3967 DBUG_ENTER("pk_unique_index_read_key_pushed");
3968 NdbOperation::OperationOptions options;
3969 NdbOperation::OperationOptions *poptions = NULL;
3970 options.optionsPresent= 0;
3971 NdbOperation::GetValueSpec gets[2];
3972
3973 assert(m_thd_ndb->trans);
3974 assert(idx < MAX_KEY);
3975
3976 if (m_active_query)
3977 {
3978 m_active_query->close(FALSE);
3979 m_active_query= NULL;
3980 }
3981
3982 if (table_share->primary_key == MAX_KEY)
3983 {
3984 get_hidden_fields_keyop(&options, gets);
3985 poptions= &options;
3986 }
3987 get_read_set(false, idx);
3988
3989 if (ppartition_id != NULL)
3990 {
3991 assert(m_user_defined_partitioning);
3992 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3993 options.partitionId= *ppartition_id;
3994 poptions= &options;
3995 }
3996
3997 KEY *key_def= &table->key_info[idx];
3998 KEY_PART_INFO *key_part;
3999
4000 uint i;
4001 Uint32 offset= 0;
4002 NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];
4003 assert(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);
4004
4005 uint map[ndb_pushed_join::MAX_KEY_PART];
4006 ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);
4007
4008 // Bind key values defining root of pushed join
4009 for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)
4010 {
4011 bool shrinkVarChar= is_shrinked_varchar(key_part->field);
4012
4013 if (key_part->null_bit) // Column is nullable
4014 {
4015 assert(idx != table_share->primary_key); // PK can't be nullable
4016 assert(*(key+offset)==0); // Null values not allowed in key
4017 // Value is imm. after NULL indicator
4018 paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);
4019 }
4020 else // Non-nullable column
4021 {
4022 paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);
4023 }
4024 offset+= key_part->store_length;
4025 }
4026
4027 const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);
4028 DBUG_RETURN(ret);
4029 }
4030
4031
4032 /** Count number of columns in key part. */
4033 static uint
count_key_columns(const KEY * key_info,const key_range * key)4034 count_key_columns(const KEY *key_info, const key_range *key)
4035 {
4036 KEY_PART_INFO *first_key_part= key_info->key_part;
4037 KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;
4038 KEY_PART_INFO *key_part;
4039 uint length= 0;
4040 for(key_part= first_key_part; key_part < key_part_end; key_part++)
4041 {
4042 if (length >= key->length)
4043 break;
4044 length+= key_part->store_length;
4045 }
4046 return (uint)(key_part - first_key_part);
4047 }
4048
4049 /* Helper method to compute NDB index bounds. Note: does not set range_no. */
4050 /* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */
4051 void
compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,const KEY * key_info,const key_range * start_key,const key_range * end_key,int from)4052 compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,
4053 const KEY *key_info,
4054 const key_range *start_key, const key_range *end_key,
4055 int from)
4056 {
4057 DBUG_ENTER("ha_ndbcluster::compute_index_bounds");
4058 DBUG_PRINT("info", ("from: %d", from));
4059
4060 #ifndef NDEBUG
4061 DBUG_PRINT("info", ("key parts: %u length: %u",
4062 key_info->user_defined_key_parts, key_info->key_length));
4063 {
4064 for (uint j= 0; j <= 1; j++)
4065 {
4066 const key_range* kr= (j == 0 ? start_key : end_key);
4067 if (kr)
4068 {
4069 DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",
4070 j, kr->length, kr->keypart_map, kr->flag));
4071 DBUG_DUMP("key", kr->key, kr->length);
4072 }
4073 else
4074 {
4075 DBUG_PRINT("info", ("key range %u: none", j));
4076 }
4077 }
4078 }
4079 #endif
4080
4081 if (start_key)
4082 {
4083 bound.low_key= (const char*)start_key->key;
4084 bound.low_key_count= count_key_columns(key_info, start_key);
4085 bound.low_inclusive=
4086 start_key->flag != HA_READ_AFTER_KEY &&
4087 start_key->flag != HA_READ_BEFORE_KEY;
4088 }
4089 else
4090 {
4091 bound.low_key= NULL;
4092 bound.low_key_count= 0;
4093 }
4094
4095 /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */
4096 if (start_key &&
4097 (start_key->flag == HA_READ_KEY_EXACT ||
4098 start_key->flag == HA_READ_PREFIX_LAST) &&
4099 from != 1)
4100 {
4101 bound.high_key= bound.low_key;
4102 bound.high_key_count= bound.low_key_count;
4103 bound.high_inclusive= TRUE;
4104 }
4105 else if (end_key)
4106 {
4107 bound.high_key= (const char*)end_key->key;
4108 bound.high_key_count= count_key_columns(key_info, end_key);
4109 /*
4110 For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for
4111 the end_key.
4112 So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in
4113 start_key it does not set low_inclusive.
4114 */
4115 bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;
4116 if (end_key->flag == HA_READ_KEY_EXACT ||
4117 end_key->flag == HA_READ_PREFIX_LAST)
4118 {
4119 bound.low_key= bound.high_key;
4120 bound.low_key_count= bound.high_key_count;
4121 bound.low_inclusive= TRUE;
4122 }
4123 }
4124 else
4125 {
4126 bound.high_key= NULL;
4127 bound.high_key_count= 0;
4128 }
4129 DBUG_PRINT("info", ("start_flag=%d end_flag=%d"
4130 " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",
4131 start_key?start_key->flag:0, end_key?end_key->flag:0,
4132 bound.low_key_count,
4133 bound.low_key_count?bound.low_inclusive:0,
4134 bound.high_key_count,
4135 bound.high_key_count?bound.high_inclusive:0));
4136 DBUG_VOID_RETURN;
4137 }
4138
4139 /**
4140 Start ordered index scan in NDB
4141 */
4142
ordered_index_scan(const key_range * start_key,const key_range * end_key,bool sorted,bool descending,uchar * buf,part_id_range * part_spec)4143 int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
4144 const key_range *end_key,
4145 bool sorted, bool descending,
4146 uchar* buf, part_id_range *part_spec)
4147 {
4148 NdbTransaction *trans;
4149 NdbIndexScanOperation *op;
4150 int error;
4151
4152 DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
4153 DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",
4154 active_index, sorted, descending, table->read_set->bitmap[0]));
4155 DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
4156
4157 // Check that sorted seems to be initialised
4158 assert(sorted == 0 || sorted == 1);
4159
4160 if (unlikely(!(trans= get_transaction(error))))
4161 {
4162 DBUG_RETURN(error);
4163 }
4164
4165 if ((error= close_scan()))
4166 DBUG_RETURN(error);
4167
4168 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4169
4170 const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
4171 const NdbRecord *row_rec= m_ndb_record;
4172
4173 NdbIndexScanOperation::IndexBound bound;
4174 NdbIndexScanOperation::IndexBound *pbound = NULL;
4175 if (start_key != NULL || end_key != NULL)
4176 {
4177 /*
4178 Compute bounds info, reversing range boundaries
4179 if descending
4180 */
4181 compute_index_bounds(bound,
4182 table->key_info + active_index,
4183 (descending?
4184 end_key : start_key),
4185 (descending?
4186 start_key : end_key),
4187 0);
4188 bound.range_no = 0;
4189 pbound = &bound;
4190 }
4191
4192 if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
4193 {
4194 const int error= create_pushed_join();
4195 if (unlikely(error))
4196 DBUG_RETURN(error);
4197
4198 NdbQuery* const query= m_active_query;
4199 if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)
4200 ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending
4201 : NdbQueryOptions::ScanOrdering_ascending))
4202 {
4203 ERR_RETURN(query->getNdbError());
4204 }
4205
4206 if (pbound && query->setBound(key_rec, pbound)!=0)
4207 ERR_RETURN(query->getNdbError());
4208
4209 m_thd_ndb->m_scan_count++;
4210
4211 bool prunable = false;
4212 if (unlikely(query->isPrunable(prunable) != 0))
4213 ERR_RETURN(query->getNdbError());
4214 if (prunable)
4215 m_thd_ndb->m_pruned_scan_count++;
4216
4217 // Can't have BLOB in pushed joins (yet)
4218 assert(!uses_blob_value(table->read_set));
4219 }
4220 else
4221 {
4222 if (m_pushed_join_operation == PUSHED_ROOT)
4223 {
4224 m_thd_ndb->m_pushed_queries_dropped++;
4225 }
4226
4227 NdbScanOperation::ScanOptions options;
4228 options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;
4229 options.scan_flags=0;
4230
4231 NdbOperation::GetValueSpec gets[2];
4232 if (table_share->primary_key == MAX_KEY)
4233 get_hidden_fields_scan(&options, gets);
4234
4235 get_read_set(true, active_index);
4236
4237 if (lm == NdbOperation::LM_Read)
4238 options.scan_flags|= NdbScanOperation::SF_KeyInfo;
4239 if (sorted)
4240 options.scan_flags|= NdbScanOperation::SF_OrderByFull;
4241 if (descending)
4242 options.scan_flags|= NdbScanOperation::SF_Descending;
4243
4244 /* Partition pruning */
4245 if (m_use_partition_pruning &&
4246 m_user_defined_partitioning && part_spec != NULL &&
4247 part_spec->start_part == part_spec->end_part)
4248 {
4249 /* Explicitly set partition id when pruning User-defined partitioned scan */
4250 options.partitionId = part_spec->start_part;
4251 options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4252 }
4253
4254 NdbInterpretedCode code(m_table);
4255 if (m_cond && m_cond->generate_scan_filter(&code, &options))
4256 ERR_RETURN(code.getNdbError());
4257
4258 if (!(op= trans->scanIndex(key_rec, row_rec, lm,
4259 (uchar *)(table->read_set->bitmap),
4260 pbound,
4261 &options,
4262 sizeof(NdbScanOperation::ScanOptions))))
4263 ERR_RETURN(trans->getNdbError());
4264
4265 DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));
4266 m_thd_ndb->m_scan_count++;
4267 m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4268
4269 if (uses_blob_value(table->read_set) &&
4270 get_blob_values(op, NULL, table->read_set) != 0)
4271 ERR_RETURN(op->getNdbError());
4272
4273 m_active_cursor= op;
4274 }
4275
4276 if (sorted)
4277 {
4278 m_thd_ndb->m_sorted_scan_count++;
4279 }
4280
4281 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4282 DBUG_RETURN(ndb_err(trans));
4283
4284 DBUG_RETURN(next_result(buf));
4285 }
4286
4287 static
4288 int
guess_scan_flags(NdbOperation::LockMode lm,const NDBTAB * tab,const MY_BITMAP * readset)4289 guess_scan_flags(NdbOperation::LockMode lm,
4290 const NDBTAB* tab, const MY_BITMAP* readset)
4291 {
4292 int flags= 0;
4293 flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
4294 if (tab->checkColumns(0, 0) & 2)
4295 {
4296 int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
4297
4298 if (ret & 2)
4299 { // If disk columns...use disk scan
4300 flags |= NdbScanOperation::SF_DiskScan;
4301 }
4302 else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
4303 {
4304 // If no mem column is set and exclusive...guess disk scan
4305 flags |= NdbScanOperation::SF_DiskScan;
4306 }
4307 }
4308 return flags;
4309 }
4310
4311 /*
4312 Start full table scan in NDB or unique index scan
4313 */
4314
full_table_scan(const KEY * key_info,const key_range * start_key,const key_range * end_key,uchar * buf)4315 int ha_ndbcluster::full_table_scan(const KEY* key_info,
4316 const key_range *start_key,
4317 const key_range *end_key,
4318 uchar *buf)
4319 {
4320 int error;
4321 NdbTransaction *trans= m_thd_ndb->trans;
4322 part_id_range part_spec;
4323 bool use_set_part_id= FALSE;
4324 NdbOperation::GetValueSpec gets[2];
4325
4326 DBUG_ENTER("full_table_scan");
4327 DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
4328
4329 if (m_use_partition_pruning && m_user_defined_partitioning)
4330 {
4331 assert(m_pushed_join_operation != PUSHED_ROOT);
4332 part_spec.start_part= 0;
4333 part_spec.end_part= m_part_info->get_tot_partitions() - 1;
4334 prune_partition_set(table, &part_spec);
4335 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
4336 part_spec.start_part, part_spec.end_part));
4337 /*
4338 If partition pruning has found no partition in set
4339 we can return HA_ERR_END_OF_FILE
4340 */
4341 if (part_spec.start_part > part_spec.end_part)
4342 {
4343 DBUG_RETURN(HA_ERR_END_OF_FILE);
4344 }
4345
4346 if (part_spec.start_part == part_spec.end_part)
4347 {
4348 /*
4349 * Only one partition is required to scan, if sorted is required
4350 * don't need it anymore since output from one ordered partitioned
4351 * index is always sorted.
4352 *
4353 * Note : This table scan pruning currently only occurs for
4354 * UserDefined partitioned tables.
4355 * It could be extended to occur for natively partitioned tables if
4356 * the Partitioning layer can make a key (e.g. start or end key)
4357 * available so that we can determine the correct pruning in the
4358 * NDBAPI layer.
4359 */
4360 use_set_part_id= TRUE;
4361 if (!trans)
4362 if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
4363 error))))
4364 DBUG_RETURN(error);
4365 }
4366 }
4367 if (!trans)
4368 if (unlikely(!(trans= start_transaction(error))))
4369 DBUG_RETURN(error);
4370
4371 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4372 NdbScanOperation::ScanOptions options;
4373 options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |
4374 NdbScanOperation::ScanOptions::SO_PARALLEL);
4375 options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);
4376 options.parallel= DEFAULT_PARALLELISM;
4377
4378 if (use_set_part_id) {
4379 assert(m_user_defined_partitioning);
4380 options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4381 options.partitionId = part_spec.start_part;
4382 };
4383
4384 if (table_share->primary_key == MAX_KEY)
4385 get_hidden_fields_scan(&options, gets);
4386
4387 get_read_set(true, MAX_KEY);
4388
4389 if (check_if_pushable(NdbQueryOperationDef::TableScan))
4390 {
4391 const int error= create_pushed_join();
4392 if (unlikely(error))
4393 DBUG_RETURN(error);
4394
4395 m_thd_ndb->m_scan_count++;
4396 // Can't have BLOB in pushed joins (yet)
4397 assert(!uses_blob_value(table->read_set));
4398 }
4399 else
4400 {
4401 if (m_pushed_join_operation == PUSHED_ROOT)
4402 {
4403 m_thd_ndb->m_pushed_queries_dropped++;
4404 }
4405
4406 NdbScanOperation *op;
4407 NdbInterpretedCode code(m_table);
4408
4409 if (!key_info)
4410 {
4411 if (m_cond && m_cond->generate_scan_filter(&code, &options))
4412 ERR_RETURN(code.getNdbError());
4413 }
4414 else
4415 {
4416 /* Unique index scan in NDB (full table scan with scan filter) */
4417 DBUG_PRINT("info", ("Starting unique index scan"));
4418 if (!m_cond)
4419 m_cond= new ha_ndbcluster_cond;
4420
4421 if (!m_cond)
4422 {
4423 set_my_errno(HA_ERR_OUT_OF_MEM);
4424 DBUG_RETURN(my_errno());
4425 }
4426 if (m_cond->generate_scan_filter_from_key(&code, &options, key_info,
4427 start_key, end_key))
4428 ERR_RETURN(code.getNdbError());
4429 }
4430
4431 if (!(op= trans->scanTable(m_ndb_record, lm,
4432 (uchar *)(table->read_set->bitmap),
4433 &options, sizeof(NdbScanOperation::ScanOptions))))
4434 ERR_RETURN(trans->getNdbError());
4435
4436 m_thd_ndb->m_scan_count++;
4437 m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4438
4439 assert(m_active_cursor==NULL);
4440 m_active_cursor= op;
4441
4442 if (uses_blob_value(table->read_set) &&
4443 get_blob_values(op, NULL, table->read_set) != 0)
4444 ERR_RETURN(op->getNdbError());
4445 } // if (check_if_pushable(NdbQueryOperationDef::TableScan))
4446
4447 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4448 DBUG_RETURN(ndb_err(trans));
4449 DBUG_PRINT("exit", ("Scan started successfully"));
4450 DBUG_RETURN(next_result(buf));
4451 } // ha_ndbcluster::full_table_scan()
4452
4453 int
set_auto_inc(THD * thd,Field * field)4454 ha_ndbcluster::set_auto_inc(THD *thd, Field *field)
4455 {
4456 DBUG_ENTER("ha_ndbcluster::set_auto_inc");
4457 bool read_bit= bitmap_is_set(table->read_set, field->field_index);
4458 bitmap_set_bit(table->read_set, field->field_index);
4459 Uint64 next_val= (Uint64) field->val_int() + 1;
4460 if (!read_bit)
4461 bitmap_clear_bit(table->read_set, field->field_index);
4462 DBUG_RETURN(set_auto_inc_val(thd, next_val));
4463 }
4464
4465
4466 class Ndb_tuple_id_range_guard {
4467 NDB_SHARE* m_share;
4468 public:
Ndb_tuple_id_range_guard(NDB_SHARE * share)4469 Ndb_tuple_id_range_guard(NDB_SHARE* share) :
4470 m_share(share),
4471 range(share->tuple_id_range)
4472 {
4473 native_mutex_lock(&m_share->mutex);
4474 }
~Ndb_tuple_id_range_guard()4475 ~Ndb_tuple_id_range_guard()
4476 {
4477 native_mutex_unlock(&m_share->mutex);
4478 }
4479 Ndb::TupleIdRange& range;
4480 };
4481
4482
4483 inline
4484 int
set_auto_inc_val(THD * thd,Uint64 value)4485 ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)
4486 {
4487 Ndb *ndb= get_ndb(thd);
4488 DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");
4489 DBUG_PRINT("enter", ("value: %llu", value));
4490 if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))
4491 {
4492 Ndb_tuple_id_range_guard g(m_share);
4493 if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)
4494 == -1)
4495 ERR_RETURN(ndb->getNdbError());
4496 }
4497 DBUG_RETURN(0);
4498 }
4499
4500
4501 void
get_read_set(bool use_cursor,uint idx)4502 ha_ndbcluster::get_read_set(bool use_cursor, uint idx)
4503 {
4504 const bool is_delete=
4505 table->in_use->lex->sql_command == SQLCOM_DELETE ||
4506 table->in_use->lex->sql_command == SQLCOM_DELETE_MULTI;
4507
4508 const bool is_update=
4509 table->in_use->lex->sql_command == SQLCOM_UPDATE ||
4510 table->in_use->lex->sql_command == SQLCOM_UPDATE_MULTI;
4511
4512 assert(use_cursor ||
4513 idx == table_share->primary_key ||
4514 table->key_info[idx].flags & HA_NOSAME);
4515
4516 if (!is_delete && !is_update)
4517 {
4518 return;
4519 }
4520
4521 /**
4522 * It is questionable that we in some cases seems to
4523 * do a read even if 'm_read_before_write_removal_used'.
4524 * The usage pattern for this seems to be update/delete
4525 * cursors which establish a 'current of' position before
4526 * a delete- / updateCurrentTuple().
4527 * Anyway, as 'm_read_before_write_removal_used' we don't
4528 * have to add more columns to 'read_set'.
4529 *
4530 * FUTURE: Investigate if we could have completely
4531 * cleared the 'read_set'.
4532 *
4533 */
4534 if (m_read_before_write_removal_used)
4535 {
4536 return;
4537 }
4538
4539 /**
4540 * If (part of) a primary key is updated, it is executed
4541 * as a delete+reinsert. In order to avoid extra read-round trips
4542 * to fetch missing columns required by reinsert:
4543 * Ensure all columns not being modified (in write_set)
4544 * are read prior to ::ndb_pk_update_row().
4545 * All PK columns are also required by ::ndb_delete_row()
4546 */
4547 if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
4548 {
4549 assert(table_share->primary_key != MAX_KEY);
4550 bitmap_set_all(&m_bitmap);
4551 bitmap_subtract(&m_bitmap, table->write_set);
4552 bitmap_union(table->read_set, &m_bitmap);
4553 bitmap_union(table->read_set, m_pk_bitmap_p);
4554 }
4555
4556 /**
4557 * Determine whether we have to read PK columns in
4558 * addition to those columns already present in read_set.
4559 * NOTE: As checked above, It is a precondition that
4560 * a read is required as part of delete/update
4561 * (!m_read_before_write_removal_used)
4562 *
4563 * PK columns are required when:
4564 * 1) This is a primary/unique keyop.
4565 * (i.e. not a positioned update/delete which
4566 * maintain a 'current of' position.)
4567 *
4568 * In addition, when a 'current of' position is available:
4569 * 2) When deleting a row containing BLOBs PK is required
4570 * to delete BLOB stored in seperate fragments.
4571 * 3) When updating BLOB columns PK is required to delete
4572 * old BLOB + insert new BLOB contents
4573 */
4574 else
4575 if (!use_cursor || // 1)
4576 (is_delete && table_share->blob_fields) || // 2)
4577 uses_blob_value(table->write_set)) // 3)
4578 {
4579 bitmap_union(table->read_set, m_pk_bitmap_p);
4580 }
4581
4582 /**
4583 * If update/delete use partition pruning, we need
4584 * to read the column values which being part of the
4585 * partition spec as they are used by
4586 * ::get_parts_for_update() / ::get_parts_for_delete()
4587 * Part. columns are always part of PK, so we only
4588 * have to do this if pk_bitmap wasnt added yet,
4589 */
4590 else if (m_use_partition_pruning) // && m_user_defined_partitioning)
4591 {
4592 assert(bitmap_is_subset(&m_part_info->full_part_field_set,
4593 m_pk_bitmap_p));
4594 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4595 }
4596
4597
4598 /**
4599 * Update might cause PK or Unique key violation.
4600 * Error reporting need values from the offending
4601 * unique columns to have been read:
4602 *
4603 * NOTE: This is NOT required for the correctness
4604 * of the update operation itself. Maybe we
4605 * should consider other strategies, like
4606 * defering reading of the column values
4607 * until formating the error message.
4608 */
4609 if (is_update && m_has_unique_index)
4610 {
4611 for (uint i= 0; i < table_share->keys; i++)
4612 {
4613 if ((table->key_info[i].flags & HA_NOSAME) &&
4614 bitmap_is_overlapping(table->write_set, m_key_fields[i]))
4615 {
4616 bitmap_union(table->read_set, m_key_fields[i]);
4617 }
4618 }
4619 }
4620 }
4621
4622
4623 Uint32
setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])4624 ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])
4625 {
4626 Uint32 num_gets= 0;
4627 /*
4628 We need to read the hidden primary key, and possibly the FRAGMENT
4629 pseudo-column.
4630 */
4631 gets[num_gets].column= get_hidden_key_column();
4632 gets[num_gets].appStorage= &m_ref;
4633 num_gets++;
4634 if (m_user_defined_partitioning)
4635 {
4636 /* Need to read partition id to support ORDER BY columns. */
4637 gets[num_gets].column= NdbDictionary::Column::FRAGMENT;
4638 gets[num_gets].appStorage= &m_part_id;
4639 num_gets++;
4640 }
4641 return num_gets;
4642 }
4643
4644 void
get_hidden_fields_keyop(NdbOperation::OperationOptions * options,NdbOperation::GetValueSpec gets[2])4645 ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,
4646 NdbOperation::GetValueSpec gets[2])
4647 {
4648 Uint32 num_gets= setup_get_hidden_fields(gets);
4649 options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;
4650 options->extraGetValues= gets;
4651 options->numExtraGetValues= num_gets;
4652 }
4653
4654 void
get_hidden_fields_scan(NdbScanOperation::ScanOptions * options,NdbOperation::GetValueSpec gets[2])4655 ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,
4656 NdbOperation::GetValueSpec gets[2])
4657 {
4658 Uint32 num_gets= setup_get_hidden_fields(gets);
4659 options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;
4660 options->extraGetValues= gets;
4661 options->numExtraGetValues= num_gets;
4662 }
4663
4664 inline void
eventSetAnyValue(THD * thd,NdbOperation::OperationOptions * options) const4665 ha_ndbcluster::eventSetAnyValue(THD *thd,
4666 NdbOperation::OperationOptions *options) const
4667 {
4668 options->anyValue= 0;
4669 if (unlikely(m_slow_path))
4670 {
4671 /*
4672 Ignore TNTO_NO_LOGGING for slave thd. It is used to indicate
4673 log-slave-updates option. This is instead handled in the
4674 injector thread, by looking explicitly at the
4675 opt_log_slave_updates flag.
4676 */
4677 Thd_ndb *thd_ndb= get_thd_ndb(thd);
4678 if (thd->slave_thread)
4679 {
4680 /*
4681 Slave-thread, we are applying a replicated event.
4682 We set the server_id to the value received from the log which
4683 may be a composite of server_id and other data according
4684 to the server_id_bits option.
4685 In future it may be useful to support *not* mapping composite
4686 AnyValues to/from Binlogged server-ids
4687 */
4688 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4689 options->anyValue = thd_unmasked_server_id(thd);
4690 }
4691 else if (thd_ndb->trans_options & TNTO_NO_LOGGING)
4692 {
4693 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4694 ndbcluster_anyvalue_set_nologging(options->anyValue);
4695 }
4696 }
4697 #ifndef NDEBUG
4698 DBUG_EXECUTE_IF("ndb_set_reflect_anyvalue",
4699 {
4700 fprintf(stderr, "Ndb forcing reflect AnyValue\n");
4701 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4702 ndbcluster_anyvalue_set_reflect_op(options->anyValue);
4703 });
4704 DBUG_EXECUTE_IF("ndb_set_refresh_anyvalue",
4705 {
4706 fprintf(stderr, "Ndb forcing refresh AnyValue\n");
4707 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4708 ndbcluster_anyvalue_set_refresh_op(options->anyValue);
4709 });
4710
4711 /*
4712 MySQLD will set the user-portion of AnyValue (if any) to all 1s
4713 This tests code filtering ServerIds on the value of server-id-bits.
4714 */
4715 const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");
4716 if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
4717 {
4718 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4719 dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);
4720 }
4721 #endif
4722 }
4723
4724 #ifdef HAVE_NDB_BINLOG
4725
4726 /**
4727 prepare_conflict_detection
4728
4729 This method is called during operation definition by the slave,
4730 when writing to a table with conflict detection defined.
4731
4732 It is responsible for defining and adding any operation filtering
4733 required, and for saving any operation definition state required
4734 for post-execute analysis.
4735
4736 For transactional detection, this method may determine that the
4737 operation being defined should not be executed, and conflict
4738 handling should occur immediately. In this case, conflict_handled
4739 is set to true.
4740 */
4741 int
prepare_conflict_detection(enum_conflicting_op_type op_type,const NdbRecord * key_rec,const NdbRecord * data_rec,const uchar * old_data,const uchar * new_data,const MY_BITMAP * write_set,NdbTransaction * trans,NdbInterpretedCode * code,NdbOperation::OperationOptions * options,bool & conflict_handled,bool & avoid_ndbapi_write)4742 ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,
4743 const NdbRecord* key_rec,
4744 const NdbRecord* data_rec,
4745 const uchar* old_data,
4746 const uchar* new_data,
4747 const MY_BITMAP *write_set,
4748 NdbTransaction* trans,
4749 NdbInterpretedCode* code,
4750 NdbOperation::OperationOptions* options,
4751 bool& conflict_handled,
4752 bool& avoid_ndbapi_write)
4753 {
4754 DBUG_ENTER("prepare_conflict_detection");
4755 THD* thd = table->in_use;
4756 int res = 0;
4757 assert(thd->slave_thread);
4758
4759 conflict_handled = false;
4760
4761 /*
4762 Special check for apply_status table, as we really don't want
4763 to do any special handling with it
4764 */
4765 if (unlikely(m_share == ndb_apply_status_share))
4766 {
4767 DBUG_RETURN(0);
4768 }
4769
4770 /*
4771 Check transaction id first, as in transactional conflict detection,
4772 the transaction id is what eventually dictates whether an operation
4773 is applied or not.
4774
4775 Not that this applies even if the current operation's table does not
4776 have a conflict function defined - if a transaction spans a 'transactional
4777 conflict detection' table and a non transactional table, the non-transactional
4778 table's data will also be reverted.
4779 */
4780 Uint64 transaction_id = Ndb_binlog_extra_row_info::InvalidTransactionId;
4781 Uint16 conflict_flags = Ndb_binlog_extra_row_info::UnsetConflictFlags;
4782 bool op_is_marked_as_read= false;
4783 bool op_is_marked_as_reflected= false;
4784 bool op_is_marked_as_refresh= false;
4785
4786 if (thd->binlog_row_event_extra_data)
4787 {
4788 Ndb_binlog_extra_row_info extra_row_info;
4789 if (extra_row_info.loadFromBuffer(thd->binlog_row_event_extra_data) != 0)
4790 {
4791 sql_print_warning("NDB Slave : Malformed event received on table %s "
4792 "cannot parse. Stopping Slave.",
4793 m_share->key_string());
4794 DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
4795 }
4796
4797 if (extra_row_info.getFlags() &
4798 Ndb_binlog_extra_row_info::NDB_ERIF_TRANSID)
4799 transaction_id = extra_row_info.getTransactionId();
4800
4801 if (extra_row_info.getFlags() &
4802 Ndb_binlog_extra_row_info::NDB_ERIF_CFT_FLAGS)
4803 {
4804 DBUG_PRINT("info",
4805 ("Slave : have conflict flags : %x\n",
4806 extra_row_info.getConflictFlags()));
4807 conflict_flags = extra_row_info.getConflictFlags();
4808
4809 if (conflict_flags & NDB_ERIF_CFT_REFLECT_OP)
4810 {
4811 op_is_marked_as_reflected= true;
4812 g_ndb_slave_state.current_reflect_op_prepare_count++;
4813 }
4814
4815 if (conflict_flags & NDB_ERIF_CFT_REFRESH_OP)
4816 {
4817 op_is_marked_as_refresh= true;
4818 g_ndb_slave_state.current_refresh_op_count++;
4819 }
4820
4821 if (conflict_flags & NDB_ERIF_CFT_READ_OP)
4822 op_is_marked_as_read= true;
4823
4824 /* Sanity - 1 flag at a time at most */
4825 assert(! (op_is_marked_as_reflected &&
4826 op_is_marked_as_refresh));
4827 assert(! (op_is_marked_as_read &&
4828 (op_is_marked_as_reflected ||
4829 op_is_marked_as_refresh)));
4830 }
4831 }
4832
4833 const st_conflict_fn_def* conflict_fn = (m_share->m_cfn_share?
4834 m_share->m_cfn_share->m_conflict_fn:
4835 NULL);
4836
4837 bool pass_mode = false;
4838 if (conflict_fn)
4839 {
4840 /* Check Slave Conflict Role Variable setting */
4841 if (conflict_fn->flags & CF_USE_ROLE_VAR)
4842 {
4843 switch (opt_ndb_slave_conflict_role)
4844 {
4845 case SCR_NONE:
4846 {
4847 sql_print_warning("NDB Slave : Conflict function %s defined on "
4848 "table %s requires ndb_slave_conflict_role variable "
4849 "to be set. Stopping slave.",
4850 conflict_fn->name,
4851 m_share->key_string());
4852 DBUG_RETURN(ER_SLAVE_CONFIGURATION);
4853 }
4854 case SCR_PASS:
4855 {
4856 pass_mode = true;
4857 }
4858 default:
4859 /* PRIMARY, SECONDARY */
4860 break;
4861 }
4862 }
4863 }
4864
4865 {
4866 bool handle_conflict_now = false;
4867 const uchar* row_data = (op_type == WRITE_ROW? new_data : old_data);
4868 int res = g_ndb_slave_state.atPrepareConflictDetection(m_table,
4869 key_rec,
4870 row_data,
4871 transaction_id,
4872 handle_conflict_now);
4873 if (res)
4874 DBUG_RETURN(res);
4875
4876 if (handle_conflict_now)
4877 {
4878 DBUG_PRINT("info", ("Conflict handling for row occurring now"));
4879 NdbError noRealConflictError;
4880 /*
4881 * If the user operation was a read and we receive an update
4882 * log event due to an AnyValue update, then the conflicting operation
4883 * should be reported as a read.
4884 */
4885 enum_conflicting_op_type conflicting_op=
4886 (op_type == UPDATE_ROW && op_is_marked_as_read)?
4887 READ_ROW
4888 : op_type;
4889 /*
4890 Directly handle the conflict here - e.g refresh/ write to
4891 exceptions table etc.
4892 */
4893 res = handle_row_conflict(m_share->m_cfn_share,
4894 m_share->table_name,
4895 m_share->flags & NSF_BLOB_FLAG,
4896 "Transaction",
4897 key_rec,
4898 data_rec,
4899 old_data,
4900 new_data,
4901 conflicting_op,
4902 TRANS_IN_CONFLICT,
4903 noRealConflictError,
4904 trans,
4905 write_set,
4906 transaction_id);
4907 if (unlikely(res))
4908 DBUG_RETURN(res);
4909
4910 g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;
4911
4912 /*
4913 Indicate that there (may be) some more operations to
4914 execute before committing
4915 */
4916 m_thd_ndb->m_unsent_bytes+= 12;
4917 conflict_handled = true;
4918 DBUG_RETURN(0);
4919 }
4920 }
4921
4922 if (conflict_fn == NULL ||
4923 pass_mode)
4924 {
4925 /* No conflict function definition required */
4926 DBUG_RETURN(0);
4927 }
4928
4929 /**
4930 * By default conflict algorithms use the 'natural' NdbApi ops
4931 * (insert/update/delete) which can detect presence anomalies,
4932 * as opposed to NdbApi write which ignores them.
4933 * However in some cases, we want to use NdbApi write to apply
4934 * events received on tables with conflict detection defined
4935 * (e.g. when we want to forcibly align a row with a refresh op).
4936 */
4937 avoid_ndbapi_write = true;
4938
4939 if (unlikely((conflict_fn->flags & CF_TRANSACTIONAL) &&
4940 (transaction_id == Ndb_binlog_extra_row_info::InvalidTransactionId)))
4941 {
4942 sql_print_warning("NDB Slave : Transactional conflict detection defined on table %s, but "
4943 "events received without transaction ids. Check --ndb-log-transaction-id setting "
4944 "on upstream Cluster.",
4945 m_share->key_string());
4946 /* This is a user error, but we want them to notice, so treat seriously */
4947 DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
4948 }
4949
4950 /**
4951 * Normally, update and delete have an attached program executed against
4952 * the existing row content. Insert (and NdbApi write) do not.
4953 * Insert cannot as there is no pre-existing row to examine (and therefore
4954 * no non prepare-time deterministic decisions to make).
4955 * NdbApi Write technically could if the row already existed, but this is
4956 * not currently supported by NdbApi.
4957 */
4958 bool prepare_interpreted_program = (op_type != WRITE_ROW);
4959
4960 if (conflict_fn->flags & CF_REFLECT_SEC_OPS)
4961 {
4962 /* This conflict function reflects secondary ops at the Primary */
4963
4964 if (opt_ndb_slave_conflict_role == SCR_PRIMARY)
4965 {
4966 /**
4967 * Here we mark the applied operations to indicate that they
4968 * should be reflected back to the SECONDARY cluster.
4969 * This is required so that :
4970 * 1. They are given local Binlog Event source serverids
4971 * and so will pass through to the storage engine layer
4972 * on the SECONDARY.
4973 * (Normally they would be filtered in the Slave IO thread
4974 * as having returned-to-source)
4975 *
4976 * 2. They can be tagged as reflected so that the SECONDARY
4977 * can handle them differently
4978 * (They are force-applied)
4979 */
4980 DBUG_PRINT("info", ("Setting AnyValue to reflect secondary op"));
4981
4982 options->optionsPresent |=
4983 NdbOperation::OperationOptions::OO_ANYVALUE;
4984 ndbcluster_anyvalue_set_reflect_op(options->anyValue);
4985 }
4986 else if (opt_ndb_slave_conflict_role == SCR_SECONDARY)
4987 {
4988 /**
4989 * On the Secondary, we receive reflected operations which
4990 * we want to attempt to apply under certain conditions.
4991 * This is done to recover from situations where
4992 * both PRIMARY and SECONDARY have performed concurrent
4993 * DELETEs.
4994 *
4995 * For non reflected operations we want to apply Inserts and
4996 * Updates using write_tuple() to get an idempotent effect
4997 */
4998 if (op_is_marked_as_reflected)
4999 {
5000 /**
5001 * Apply operations using their 'natural' operation types
5002 * with interpreted programs attached where appropriate.
5003 * Natural operation types used so that we become aware
5004 * of any 'presence' issues (row does/not exist).
5005 */
5006 DBUG_PRINT("info", ("Reflected operation"));
5007 }
5008 else
5009 {
5010 /**
5011 * Either a normal primary sourced change, or a refresh
5012 * operation.
5013 * In both cases we want to apply the operation idempotently,
5014 * and there's no need for an interpreted program.
5015 * e.g.
5016 * WRITE_ROW -> NdbApi write_row
5017 * UPDATE_ROW -> NdbApi write_row
5018 * DELETE_ROW -> NdbApi delete_row
5019 *
5020 * NdbApi write_row does not fail.
5021 * NdbApi delete_row will complain if the row does not exist
5022 * but this will be ignored
5023 */
5024 DBUG_PRINT("info", ("Allowing use of NdbApi write_row "
5025 "for non reflected op (%u)",
5026 op_is_marked_as_refresh));
5027 prepare_interpreted_program = false;
5028 avoid_ndbapi_write = false;
5029 }
5030 }
5031 }
5032
5033 /*
5034 Prepare interpreted code for operation (update + delete only) according
5035 to algorithm used
5036 */
5037 if (prepare_interpreted_program)
5038 {
5039 res = conflict_fn->prep_func(m_share->m_cfn_share,
5040 op_type,
5041 m_ndb_record,
5042 old_data,
5043 new_data,
5044 table->read_set, // Before image
5045 table->write_set, // After image
5046 code);
5047
5048 if (res == 0)
5049 {
5050 if (code->getWordsUsed() > 0)
5051 {
5052 /* Attach conflict detecting filter program to operation */
5053 options->optionsPresent|=
5054 NdbOperation::OperationOptions::OO_INTERPRETED;
5055 options->interpretedCode= code;
5056 }
5057 }
5058 else
5059 {
5060 sql_print_warning("NDB Slave : Binlog event on table %s missing "
5061 "info necessary for conflict detection. "
5062 "Check binlog format options on upstream cluster.",
5063 m_share->key_string());
5064 DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT);
5065 }
5066 } // if (op_type != WRITE_ROW)
5067
5068 g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;
5069
5070 /* Now save data for potential insert to exceptions table... */
5071 Ndb_exceptions_data ex_data;
5072 ex_data.share= m_share;
5073 ex_data.key_rec= key_rec;
5074 ex_data.data_rec= data_rec;
5075 ex_data.op_type= op_type;
5076 ex_data.reflected_operation = op_is_marked_as_reflected;
5077 ex_data.trans_id= transaction_id;
5078 /*
5079 We need to save the row data for possible conflict resolution after
5080 execute().
5081 */
5082 if (old_data)
5083 ex_data.old_row= copy_row_to_buffer(m_thd_ndb, old_data);
5084 if (old_data != NULL && ex_data.old_row == NULL)
5085 {
5086 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5087 }
5088 if (new_data)
5089 ex_data.new_row= copy_row_to_buffer(m_thd_ndb, new_data);
5090 if (new_data != NULL && ex_data.new_row == NULL)
5091 {
5092 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5093 }
5094
5095 ex_data.bitmap_buf= NULL;
5096 ex_data.write_set= NULL;
5097 if (table->write_set)
5098 {
5099 /* Copy table write set */
5100 ex_data.bitmap_buf=
5101 (my_bitmap_map *) get_buffer(m_thd_ndb, table->s->column_bitmap_size);
5102 if (ex_data.bitmap_buf == NULL)
5103 {
5104 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5105 }
5106 ex_data.write_set= (MY_BITMAP*) get_buffer(m_thd_ndb, sizeof(MY_BITMAP));
5107 if (ex_data.write_set == NULL)
5108 {
5109 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5110 }
5111 bitmap_init(ex_data.write_set, ex_data.bitmap_buf,
5112 table->write_set->n_bits, false);
5113 bitmap_copy(ex_data.write_set, table->write_set);
5114 }
5115
5116 uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));
5117 if (ex_data_buffer == NULL)
5118 {
5119 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5120 }
5121 memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));
5122
5123 /* Store ptr to exceptions data in operation 'customdata' ptr */
5124 options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;
5125 options->customData= (void*)ex_data_buffer;
5126
5127 DBUG_RETURN(0);
5128 }
5129
5130 /**
5131 handle_conflict_op_error
5132
5133 This method is called when an error is detected after executing an
5134 operation with conflict detection active.
5135
5136 If the operation error is related to conflict detection, handling
5137 starts.
5138
5139 Handling involves incrementing the relevant counter, and optionally
5140 refreshing the row and inserting an entry into the exceptions table
5141 */
5142
5143 static int
handle_conflict_op_error(NdbTransaction * trans,const NdbError & err,const NdbOperation * op)5144 handle_conflict_op_error(NdbTransaction* trans,
5145 const NdbError& err,
5146 const NdbOperation* op)
5147 {
5148 DBUG_ENTER("handle_conflict_op_error");
5149 DBUG_PRINT("info", ("ndb error: %d", err.code));
5150
5151 if ((err.code == (int) error_conflict_fn_violation) ||
5152 (err.code == (int) error_op_after_refresh_op) ||
5153 (err.classification == NdbError::ConstraintViolation) ||
5154 (err.classification == NdbError::NoDataFound))
5155 {
5156 DBUG_PRINT("info",
5157 ("err.code = %s, err.classification = %s",
5158 ((err.code == (int) error_conflict_fn_violation)?
5159 "error_conflict_fn_violation":
5160 ((err.code == (int) error_op_after_refresh_op)?
5161 "error_op_after_refresh_op" : "?")),
5162 ((err.classification == NdbError::ConstraintViolation)?
5163 "ConstraintViolation":
5164 ((err.classification == NdbError::NoDataFound)?
5165 "NoDataFound" : "?"))));
5166
5167 enum_conflict_cause conflict_cause;
5168
5169 /* Map cause onto our conflict description type */
5170 if ((err.code == (int) error_conflict_fn_violation) ||
5171 (err.code == (int) error_op_after_refresh_op))
5172 {
5173 DBUG_PRINT("info", ("ROW_IN_CONFLICT"));
5174 conflict_cause= ROW_IN_CONFLICT;
5175 }
5176 else if (err.classification == NdbError::ConstraintViolation)
5177 {
5178 DBUG_PRINT("info", ("ROW_ALREADY_EXISTS"));
5179 conflict_cause= ROW_ALREADY_EXISTS;
5180 }
5181 else
5182 {
5183 assert(err.classification == NdbError::NoDataFound);
5184 DBUG_PRINT("info", ("ROW_DOES_NOT_EXIST"));
5185 conflict_cause= ROW_DOES_NOT_EXIST;
5186 }
5187
5188 /* Get exceptions data from operation */
5189 const void* buffer=op->getCustomData();
5190 assert(buffer);
5191 Ndb_exceptions_data ex_data;
5192 memcpy(&ex_data, buffer, sizeof(ex_data));
5193 NDB_SHARE *share= ex_data.share;
5194 NDB_CONFLICT_FN_SHARE* cfn_share= share ? share->m_cfn_share : NULL;
5195
5196 const NdbRecord* key_rec= ex_data.key_rec;
5197 const NdbRecord* data_rec= ex_data.data_rec;
5198 const uchar* old_row= ex_data.old_row;
5199 const uchar* new_row= ex_data.new_row;
5200 #ifndef NDEBUG
5201 const uchar* row=
5202 (ex_data.op_type == DELETE_ROW)?
5203 ex_data.old_row : ex_data.new_row;
5204 #endif
5205 enum_conflicting_op_type causing_op_type= ex_data.op_type;
5206 const MY_BITMAP *write_set= ex_data.write_set;
5207
5208 DBUG_PRINT("info", ("Conflict causing op type : %u",
5209 causing_op_type));
5210
5211 if (causing_op_type == REFRESH_ROW)
5212 {
5213 /*
5214 The failing op was a refresh row, we require that it
5215 failed due to being a duplicate (e.g. a refresh
5216 occurring on a refreshed row)
5217 */
5218 if (err.code == (int) error_op_after_refresh_op)
5219 {
5220 DBUG_PRINT("info", ("Operation after refresh - ignoring"));
5221 DBUG_RETURN(0);
5222 }
5223 else
5224 {
5225 DBUG_PRINT("info", ("Refresh op hit real error %u", err.code));
5226 /* Unexpected error, normal handling*/
5227 DBUG_RETURN(err.code);
5228 }
5229 }
5230
5231 if (ex_data.reflected_operation)
5232 {
5233 DBUG_PRINT("info", ("Reflected operation error : %u.",
5234 err.code));
5235
5236 /**
5237 * Expected cases are :
5238 * Insert : Row already exists : Don't care - discard
5239 * Secondary has this row, or a future version
5240 *
5241 * Update : Row does not exist : Don't care - discard
5242 * Secondary has deleted this row later.
5243 *
5244 * Conflict
5245 * (Row written here last) : Don't care - discard
5246 * Secondary has this row, or a future version
5247 *
5248 * Delete : Row does not exist : Don't care - discard
5249 * Secondary has deleted this row later.
5250 *
5251 * Conflict
5252 * (Row written here last) : Don't care - discard
5253 * Secondary has a future version of this row
5254 *
5255 * Presence and authorship conflicts are used to determine
5256 * whether to apply a reflecte operation.
5257 * The presence checks avoid divergence and the authorship
5258 * checks avoid all actions being applied in delayed
5259 * duplicate.
5260 */
5261 assert((err.code == (int) error_conflict_fn_violation) ||
5262 (err.classification == NdbError::ConstraintViolation) ||
5263 (err.classification == NdbError::NoDataFound));
5264
5265 g_ndb_slave_state.current_reflect_op_discard_count++;
5266
5267 DBUG_RETURN(0);
5268 }
5269
5270 {
5271 /**
5272 * For asymmetric algorithms that use the ROLE variable to
5273 * determine their role, we check whether we are on the
5274 * SECONDARY cluster.
5275 * This is far as we want to process conflicts on the
5276 * SECONDARY.
5277 */
5278 bool secondary = cfn_share &&
5279 cfn_share->m_conflict_fn &&
5280 (cfn_share->m_conflict_fn->flags & CF_USE_ROLE_VAR) &&
5281 (opt_ndb_slave_conflict_role == SCR_SECONDARY);
5282
5283 if (secondary)
5284 {
5285 DBUG_PRINT("info", ("Conflict detected, on secondary - ignore"));
5286 DBUG_RETURN(0);
5287 }
5288 }
5289
5290 assert(share != NULL && row != NULL);
5291 bool table_has_trans_conflict_detection =
5292 cfn_share &&
5293 cfn_share->m_conflict_fn &&
5294 (cfn_share->m_conflict_fn->flags & CF_TRANSACTIONAL);
5295
5296 if (table_has_trans_conflict_detection)
5297 {
5298 /* Mark this transaction as in-conflict, unless this is a
5299 * Delete-Delete conflict, which we can't currently handle
5300 * in the normal way
5301 */
5302 if (! ((causing_op_type == DELETE_ROW) &&
5303 (conflict_cause == ROW_DOES_NOT_EXIST)))
5304 {
5305 /* Perform special transactional conflict-detected handling */
5306 int res = g_ndb_slave_state.atTransConflictDetected(ex_data.trans_id);
5307 if (res)
5308 DBUG_RETURN(res);
5309 }
5310 }
5311
5312 if (cfn_share)
5313 {
5314 /* Now handle the conflict on this row */
5315 enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;
5316
5317 g_ndb_slave_state.current_violation_count[cft]++;
5318
5319 int res = handle_row_conflict(cfn_share,
5320 share->table_name,
5321 false, /* table_has_blobs */
5322 "Row",
5323 key_rec,
5324 data_rec,
5325 old_row,
5326 new_row,
5327 causing_op_type,
5328 conflict_cause,
5329 err,
5330 trans,
5331 write_set,
5332 /*
5333 ORIG_TRANSID not available for
5334 non-transactional conflict detection.
5335 */
5336 Ndb_binlog_extra_row_info::InvalidTransactionId);
5337
5338 DBUG_RETURN(res);
5339 }
5340 else
5341 {
5342 DBUG_PRINT("info", ("missing cfn_share"));
5343 DBUG_RETURN(0); // TODO : Correct?
5344 }
5345 }
5346 else
5347 {
5348 /* Non conflict related error */
5349 DBUG_PRINT("info", ("err.code == %u", err.code));
5350 DBUG_RETURN(err.code);
5351 }
5352
5353 DBUG_RETURN(0); // Reachable?
5354 }
5355
5356 /*
5357 is_serverid_local
5358 */
is_serverid_local(Uint32 serverid)5359 static bool is_serverid_local(Uint32 serverid)
5360 {
5361 /*
5362 If it's not our serverid, check the
5363 IGNORE_SERVER_IDS setting to check if
5364 it's local.
5365 */
5366 return ((serverid == ::server_id) ||
5367 ndb_mi_get_ignore_server_id(serverid));
5368 }
5369 #endif
5370
write_row(uchar * record)5371 int ha_ndbcluster::write_row(uchar *record)
5372 {
5373 DBUG_ENTER("ha_ndbcluster::write_row");
5374 #ifdef HAVE_NDB_BINLOG
5375 if (m_share == ndb_apply_status_share && table->in_use->slave_thread)
5376 {
5377 uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();
5378 uint64 row_epoch;
5379 memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),
5380 sizeof(row_server_id));
5381 memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),
5382 sizeof(row_epoch));
5383 int rc = g_ndb_slave_state.atApplyStatusWrite(master_server_id,
5384 row_server_id,
5385 row_epoch,
5386 is_serverid_local(row_server_id));
5387 if (rc != 0)
5388 {
5389 /* Stop Slave */
5390 DBUG_RETURN(rc);
5391 }
5392 }
5393 #endif /* HAVE_NDB_BINLOG */
5394 DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));
5395 }
5396
5397 /**
5398 Insert one record into NDB
5399 */
ndb_write_row(uchar * record,bool primary_key_update,bool batched_update)5400 int ha_ndbcluster::ndb_write_row(uchar *record,
5401 bool primary_key_update,
5402 bool batched_update)
5403 {
5404 bool has_auto_increment;
5405 const NdbOperation *op;
5406 THD *thd= table->in_use;
5407 Thd_ndb *thd_ndb= m_thd_ndb;
5408 NdbTransaction *trans;
5409 uint32 part_id;
5410 int error= 0;
5411 NdbOperation::SetValueSpec sets[3];
5412 Uint32 num_sets= 0;
5413 DBUG_ENTER("ha_ndbcluster::ndb_write_row");
5414
5415 error = check_slave_state(thd);
5416 if (unlikely(error))
5417 DBUG_RETURN(error);
5418
5419 has_auto_increment= (table->next_number_field && record == table->record[0]);
5420
5421 if (has_auto_increment && table_share->primary_key != MAX_KEY)
5422 {
5423 /*
5424 * Increase any auto_incremented primary key
5425 */
5426 m_skip_auto_increment= FALSE;
5427 if ((error= update_auto_increment()))
5428 DBUG_RETURN(error);
5429 m_skip_auto_increment= (insert_id_for_cur_row == 0 ||
5430 thd->auto_inc_intervals_forced.nb_elements());
5431 }
5432
5433 /*
5434 * If IGNORE the ignore constraint violations on primary and unique keys
5435 */
5436 if (!m_use_write && m_ignore_dup_key)
5437 {
5438 /*
5439 compare if expression with that in start_bulk_insert()
5440 start_bulk_insert will set parameters to ensure that each
5441 write_row is committed individually
5442 */
5443 int peek_res= peek_indexed_rows(record, NDB_INSERT);
5444
5445 if (!peek_res)
5446 {
5447 error= HA_ERR_FOUND_DUPP_KEY;
5448 }
5449 else if (peek_res != HA_ERR_KEY_NOT_FOUND)
5450 {
5451 error= peek_res;
5452 }
5453 if (error)
5454 {
5455 if ((has_auto_increment) && (m_skip_auto_increment))
5456 {
5457 int ret_val;
5458 if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5459 {
5460 DBUG_RETURN(ret_val);
5461 }
5462 }
5463 m_skip_auto_increment= TRUE;
5464 DBUG_RETURN(error);
5465 }
5466 }
5467
5468 bool uses_blobs= uses_blob_value(table->write_set);
5469
5470 Uint64 auto_value;
5471 const NdbRecord *key_rec;
5472 const uchar *key_row;
5473 if (table_share->primary_key == MAX_KEY)
5474 {
5475 /* Table has hidden primary key. */
5476 Ndb *ndb= get_ndb(thd);
5477 uint retries= NDB_AUTO_INCREMENT_RETRIES;
5478 int retry_sleep= 30; /* 30 milliseconds, transaction */
5479 for (;;)
5480 {
5481 Ndb_tuple_id_range_guard g(m_share);
5482 if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)
5483 {
5484 if (--retries && !thd->killed &&
5485 ndb->getNdbError().status == NdbError::TemporaryError)
5486 {
5487 do_retry_sleep(retry_sleep);
5488 continue;
5489 }
5490 ERR_RETURN(ndb->getNdbError());
5491 }
5492 break;
5493 }
5494 sets[num_sets].column= get_hidden_key_column();
5495 sets[num_sets].value= &auto_value;
5496 num_sets++;
5497 key_rec= m_ndb_hidden_key_record;
5498 key_row= (const uchar *)&auto_value;
5499 }
5500 else
5501 {
5502 key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
5503 key_row= record;
5504 }
5505
5506 trans= thd_ndb->trans;
5507 if (m_user_defined_partitioning)
5508 {
5509 assert(m_use_partition_pruning);
5510 longlong func_value= 0;
5511 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5512 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
5513 dbug_tmp_restore_column_map(table->read_set, old_map);
5514 if (unlikely(error))
5515 {
5516 m_part_info->err_value= func_value;
5517 DBUG_RETURN(error);
5518 }
5519 {
5520 /*
5521 We need to set the value of the partition function value in
5522 NDB since the NDB kernel doesn't have easy access to the function
5523 to calculate the value.
5524 */
5525 if (func_value >= INT_MAX32)
5526 func_value= INT_MAX32;
5527 sets[num_sets].column= get_partition_id_column();
5528 sets[num_sets].value= &func_value;
5529 num_sets++;
5530 }
5531 if (!trans)
5532 if (unlikely(!(trans= start_transaction_part_id(part_id, error))))
5533 DBUG_RETURN(error);
5534 }
5535 else if (!trans)
5536 {
5537 if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))
5538 DBUG_RETURN(error);
5539 }
5540 assert(trans);
5541
5542 ha_statistic_increment(&SSV::ha_write_count);
5543
5544 /*
5545 Setup OperationOptions
5546 */
5547 NdbOperation::OperationOptions options;
5548 NdbOperation::OperationOptions *poptions = NULL;
5549 options.optionsPresent=0;
5550
5551 eventSetAnyValue(thd, &options);
5552 const bool need_flush=
5553 thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);
5554
5555 const Uint32 authorValue = 1;
5556 if ((thd->slave_thread) &&
5557 (m_table->getExtraRowAuthorBits()))
5558 {
5559 /* Set author to indicate slave updated last */
5560 sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5561 sets[num_sets].value= &authorValue;
5562 num_sets++;
5563 }
5564
5565 if (m_user_defined_partitioning)
5566 {
5567 options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
5568 options.partitionId= part_id;
5569 }
5570 if (num_sets)
5571 {
5572 options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;
5573 options.extraSetValues= sets;
5574 options.numExtraSetValues= num_sets;
5575 }
5576 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5577 {
5578 options.optionsPresent |=
5579 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5580 }
5581
5582 if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
5583 {
5584 DBUG_PRINT("info", ("Disabling foreign keys"));
5585 options.optionsPresent |=
5586 NdbOperation::OperationOptions::OO_DISABLE_FK;
5587 }
5588
5589 if (options.optionsPresent != 0)
5590 poptions=&options;
5591
5592 const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;
5593 uint32 tmpBitmapSpace[bitmapSz];
5594 MY_BITMAP tmpBitmap;
5595 MY_BITMAP *user_cols_written_bitmap;
5596 bool avoidNdbApiWriteOp = false; /* ndb_write_row defaults to write */
5597 #ifdef HAVE_NDB_BINLOG
5598 /* Conflict resolution in slave thread */
5599 if (thd->slave_thread)
5600 {
5601 bool conflict_handled = false;
5602
5603 if (unlikely((error = prepare_conflict_detection(WRITE_ROW,
5604 key_rec,
5605 m_ndb_record,
5606 NULL, /* old_data */
5607 record, /* new_data */
5608 table->write_set,
5609 trans,
5610 NULL, /* code */
5611 &options,
5612 conflict_handled,
5613 avoidNdbApiWriteOp))))
5614 DBUG_RETURN(error);
5615
5616 if (unlikely(conflict_handled))
5617 {
5618 /* No need to continue with operation definition */
5619 /* TODO : Ensure batch execution */
5620 DBUG_RETURN(0);
5621 }
5622 };
5623 #endif
5624
5625 if (m_use_write &&
5626 !avoidNdbApiWriteOp)
5627 {
5628 uchar* mask;
5629
5630 if (applying_binlog(thd))
5631 {
5632 /*
5633 Use write_set when applying binlog to avoid trampling
5634 unchanged columns
5635 */
5636 user_cols_written_bitmap= table->write_set;
5637 mask= (uchar *)(user_cols_written_bitmap->bitmap);
5638 }
5639 else
5640 {
5641 /* Ignore write_set for REPLACE command */
5642 user_cols_written_bitmap= NULL;
5643 mask= NULL;
5644 }
5645 /* TODO : Add conflict detection etc when interpreted write supported */
5646 op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,
5647 (char *)record, mask,
5648 poptions, sizeof(NdbOperation::OperationOptions));
5649 }
5650 else
5651 {
5652 uchar *mask;
5653
5654 /* Check whether Ndb table definition includes any default values. */
5655 if (m_table->hasDefaultValues())
5656 {
5657 DBUG_PRINT("info", ("Not sending values for native defaulted columns"));
5658
5659 /*
5660 If Ndb is unaware of the table's defaults, we must provide all column values to the insert.
5661 This is done using a NULL column mask.
5662 If Ndb is aware of the table's defaults, we only need to provide
5663 the columns explicitly mentioned in the write set,
5664 plus any extra columns required due to bug#41616.
5665 plus the primary key columns required due to bug#42238.
5666 */
5667 /*
5668 The following code for setting user_cols_written_bitmap
5669 should be removed after BUG#41616 and Bug#42238 are fixed
5670 */
5671 /* Copy table write set so that we can add to it */
5672 user_cols_written_bitmap= &tmpBitmap;
5673 bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,
5674 table->write_set->n_bits, false);
5675 bitmap_copy(user_cols_written_bitmap, table->write_set);
5676
5677 for (uint i= 0; i < table->s->fields; i++)
5678 {
5679 Field *field= table->field[i];
5680 DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "
5681 "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",
5682 i,
5683 field->field_index,
5684 field->real_type(),
5685 field->flags & NO_DEFAULT_VALUE_FLAG,
5686 field->flags & PRI_KEY_FLAG));
5687 if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616
5688 PRI_KEY_FLAG)) || // bug 42238
5689 ! type_supports_default_value(field->real_type()))
5690 {
5691 bitmap_set_bit(user_cols_written_bitmap, field->field_index);
5692 }
5693 }
5694
5695 mask= (uchar *)(user_cols_written_bitmap->bitmap);
5696 }
5697 else
5698 {
5699 /* No defaults in kernel, provide all columns ourselves */
5700 DBUG_PRINT("info", ("No native defaults, sending all values"));
5701 user_cols_written_bitmap= NULL;
5702 mask = NULL;
5703 }
5704
5705 /* Using insert, we write all non default columns */
5706 op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,
5707 (char *)record, mask, // Default value should be masked
5708 poptions, sizeof(NdbOperation::OperationOptions));
5709 }
5710 if (!(op))
5711 ERR_RETURN(trans->getNdbError());
5712
5713 bool do_batch= !need_flush &&
5714 (batched_update || thd_allow_batch(thd));
5715 uint blob_count= 0;
5716 if (table_share->blob_fields > 0)
5717 {
5718 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5719 /* Set Blob values for all columns updated by the operation */
5720 int res= set_blob_values(op, record - table->record[0],
5721 user_cols_written_bitmap, &blob_count, do_batch);
5722 dbug_tmp_restore_column_map(table->read_set, old_map);
5723 if (res != 0)
5724 DBUG_RETURN(res);
5725 }
5726
5727 m_rows_changed++;
5728
5729 /*
5730 Execute write operation
5731 NOTE When doing inserts with many values in
5732 each INSERT statement it should not be necessary
5733 to NoCommit the transaction between each row.
5734 Find out how this is detected!
5735 */
5736 m_rows_inserted++;
5737 no_uncommitted_rows_update(1);
5738 if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||
5739 primary_key_update ||
5740 need_flush)
5741 {
5742 int res= flush_bulk_insert();
5743 if (res != 0)
5744 {
5745 m_skip_auto_increment= TRUE;
5746 DBUG_RETURN(res);
5747 }
5748 }
5749 if ((has_auto_increment) && (m_skip_auto_increment))
5750 {
5751 int ret_val;
5752 if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5753 {
5754 DBUG_RETURN(ret_val);
5755 }
5756 }
5757 m_skip_auto_increment= TRUE;
5758
5759 DBUG_PRINT("exit",("ok"));
5760 DBUG_RETURN(0);
5761 }
5762
5763
5764 /* Compare if an update changes the primary key in a row. */
primary_key_cmp(const uchar * old_row,const uchar * new_row)5765 int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)
5766 {
5767 uint keynr= table_share->primary_key;
5768 KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
5769 KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;
5770
5771 for (; key_part != end ; key_part++)
5772 {
5773 if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))
5774 continue;
5775
5776 /* The primary key does not allow NULLs. */
5777 assert(!key_part->null_bit);
5778
5779 if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
5780 {
5781
5782 if (key_part->field->cmp_binary((old_row + key_part->offset),
5783 (new_row + key_part->offset),
5784 (ulong) key_part->length))
5785 return 1;
5786 }
5787 else
5788 {
5789 if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
5790 key_part->length))
5791 return 1;
5792 }
5793 }
5794 return 0;
5795 }
5796
5797 #ifdef HAVE_NDB_BINLOG
5798
5799 static Ndb_exceptions_data StaticRefreshExceptionsData=
5800 { NULL, NULL, NULL, NULL, NULL, NULL, NULL, REFRESH_ROW, false, 0 };
5801
5802 static int
handle_row_conflict(NDB_CONFLICT_FN_SHARE * cfn_share,const char * table_name,bool table_has_blobs,const char * handling_type,const NdbRecord * key_rec,const NdbRecord * data_rec,const uchar * old_row,const uchar * new_row,enum_conflicting_op_type op_type,enum_conflict_cause conflict_cause,const NdbError & conflict_error,NdbTransaction * conflict_trans,const MY_BITMAP * write_set,Uint64 transaction_id)5803 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
5804 const char* table_name,
5805 bool table_has_blobs,
5806 const char* handling_type,
5807 const NdbRecord* key_rec,
5808 const NdbRecord* data_rec,
5809 const uchar* old_row,
5810 const uchar* new_row,
5811 enum_conflicting_op_type op_type,
5812 enum_conflict_cause conflict_cause,
5813 const NdbError& conflict_error,
5814 NdbTransaction* conflict_trans,
5815 const MY_BITMAP *write_set,
5816 Uint64 transaction_id)
5817 {
5818 DBUG_ENTER("handle_row_conflict");
5819
5820 const uchar* row = (op_type == DELETE_ROW)? old_row : new_row;
5821 /*
5822 We will refresh the row if the conflict function requires
5823 it, or if we are handling a transactional conflict.
5824 */
5825 bool refresh_row =
5826 (conflict_cause == TRANS_IN_CONFLICT) ||
5827 (cfn_share &&
5828 (cfn_share->m_flags & CFF_REFRESH_ROWS));
5829
5830 if (refresh_row)
5831 {
5832 /* A conflict has been detected between an applied replicated operation
5833 * and the data in the DB.
5834 * The attempt to change the local DB will have been rejected.
5835 * We now take steps to generate a refresh Binlog event so that
5836 * other clusters will be re-aligned.
5837 */
5838 DBUG_PRINT("info", ("Conflict on table %s. Operation type : %s, "
5839 "conflict cause :%s, conflict error : %u : %s",
5840 table_name,
5841 ((op_type == WRITE_ROW)? "WRITE_ROW":
5842 (op_type == UPDATE_ROW)? "UPDATE_ROW":
5843 "DELETE_ROW"),
5844 ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":
5845 (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":
5846 "ROW_IN_CONFLICT"),
5847 conflict_error.code,
5848 conflict_error.message));
5849
5850 assert(key_rec != NULL);
5851 assert(row != NULL);
5852
5853 do
5854 {
5855 /* We cannot refresh a row which has Blobs, as we do not support
5856 * Blob refresh yet.
5857 * Rows implicated by a transactional conflict function may have
5858 * Blobs.
5859 * We will generate an error in this case
5860 */
5861 if (table_has_blobs)
5862 {
5863 char msg[FN_REFLEN];
5864 my_snprintf(msg, sizeof(msg), "%s conflict handling "
5865 "on table %s failed as table has Blobs which cannot be refreshed.",
5866 handling_type,
5867 table_name);
5868
5869 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5870 ER_EXCEPTIONS_WRITE_ERROR,
5871 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
5872
5873 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
5874 }
5875
5876 /* When the slave splits an epoch into batches, a conflict row detected
5877 * and refreshed in an early batch can be written to by operations in
5878 * a later batch. As the operations will not have applied, and the
5879 * row has already been refreshed, we need not attempt to refresh
5880 * it again
5881 */
5882 if ((conflict_cause == ROW_IN_CONFLICT) &&
5883 (conflict_error.code == (int) error_op_after_refresh_op))
5884 {
5885 /* Attempt to apply an operation after the row was refreshed
5886 * Ignore the error
5887 */
5888 DBUG_PRINT("info", ("Operation after refresh error - ignoring"));
5889 break;
5890 }
5891
5892 /* When a delete operation finds that the row does not exist, it indicates
5893 * a DELETE vs DELETE conflict. If we refresh the row then we can get
5894 * non deterministic behaviour depending on slave batching as follows :
5895 * Row is deleted
5896 *
5897 * Case 1
5898 * Slave applied DELETE, INSERT in 1 batch
5899 *
5900 * After first batch, the row is present (due to INSERT), it is
5901 * refreshed.
5902 *
5903 * Case 2
5904 * Slave applied DELETE in 1 batch, INSERT in 2nd batch
5905 *
5906 * After first batch, the row is not present, it is refreshed
5907 * INSERT is then rejected.
5908 *
5909 * The problem of not being able to 'record' a DELETE vs DELETE conflict
5910 * is known. We attempt at least to give consistent behaviour for
5911 * DELETE vs DELETE conflicts by :
5912 * NOT refreshing a row when a DELETE vs DELETE conflict is detected
5913 * This should map all batching scenarios onto Case1.
5914 */
5915 if ((op_type == DELETE_ROW) &&
5916 (conflict_cause == ROW_DOES_NOT_EXIST))
5917 {
5918 g_ndb_slave_state.current_delete_delete_count++;
5919 DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));
5920 break;
5921 }
5922
5923 /*
5924 We give the refresh operation some 'exceptions data', so that
5925 it can be identified as part of conflict resolution when
5926 handling operation errors.
5927 Specifically we need to be able to handle duplicate row
5928 refreshes.
5929 As there is no unique exceptions data, we use a singleton.
5930
5931 We also need to 'force' the ANYVALUE of the row to 0 to
5932 indicate that the refresh is locally-sourced.
5933 Otherwise we can 'pickup' the ANYVALUE of a previous
5934 update to the row.
5935 If some previous update in this transaction came from a
5936 Slave, then using its ANYVALUE can result in that Slave
5937 ignoring this correction.
5938 */
5939 NdbOperation::OperationOptions options;
5940 options.optionsPresent =
5941 NdbOperation::OperationOptions::OO_CUSTOMDATA |
5942 NdbOperation::OperationOptions::OO_ANYVALUE;
5943 options.customData = &StaticRefreshExceptionsData;
5944 options.anyValue = 0;
5945
5946 /* Use AnyValue to indicate that this is a refreshTuple op */
5947 ndbcluster_anyvalue_set_refresh_op(options.anyValue);
5948
5949 /* Create a refresh to operation to realign other clusters */
5950 // TODO Do we ever get non-PK key?
5951 // Keyless table?
5952 // Unique index
5953 const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,
5954 (const char*) row,
5955 &options,
5956 sizeof(options));
5957 if (!refresh_op)
5958 {
5959 NdbError err = conflict_trans->getNdbError();
5960
5961 if (err.status == NdbError::TemporaryError)
5962 {
5963 /* Slave will roll back and retry entire transaction. */
5964 ERR_RETURN(err);
5965 }
5966 else
5967 {
5968 char msg[FN_REFLEN];
5969 my_snprintf(msg, sizeof(msg), "Row conflict handling "
5970 "on table %s hit Ndb error %d '%s'",
5971 table_name,
5972 err.code,
5973 err.message);
5974 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5975 ER_EXCEPTIONS_WRITE_ERROR,
5976 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
5977 /* Slave will stop replication. */
5978 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
5979 }
5980 }
5981 } while(0); // End of 'refresh' block
5982 }
5983
5984 DBUG_PRINT("info", ("Table %s does%s have an exceptions table",
5985 table_name,
5986 (cfn_share && cfn_share->m_ex_tab_writer.hasTable())
5987 ? "" : " not"));
5988 if (cfn_share &&
5989 cfn_share->m_ex_tab_writer.hasTable())
5990 {
5991 NdbError err;
5992 if (cfn_share->m_ex_tab_writer.writeRow(conflict_trans,
5993 key_rec,
5994 data_rec,
5995 ::server_id,
5996 ndb_mi_get_master_server_id(),
5997 g_ndb_slave_state.current_master_server_epoch,
5998 old_row,
5999 new_row,
6000 op_type,
6001 conflict_cause,
6002 transaction_id,
6003 write_set,
6004 err) != 0)
6005 {
6006 if (err.code != 0)
6007 {
6008 if (err.status == NdbError::TemporaryError)
6009 {
6010 /* Slave will roll back and retry entire transaction. */
6011 ERR_RETURN(err);
6012 }
6013 else
6014 {
6015 char msg[FN_REFLEN];
6016 my_snprintf(msg, sizeof(msg), "%s conflict handling "
6017 "on table %s hit Ndb error %d '%s'",
6018 handling_type,
6019 table_name,
6020 err.code,
6021 err.message);
6022 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6023 ER_EXCEPTIONS_WRITE_ERROR,
6024 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
6025 /* Slave will stop replication. */
6026 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
6027 }
6028 }
6029 }
6030 } /* if (cfn_share->m_ex_tab != NULL) */
6031
6032 DBUG_RETURN(0);
6033 }
6034 #endif /* HAVE_NDB_BINLOG */
6035
6036 /**
6037 Update one record in NDB using primary key.
6038 */
6039
start_bulk_update()6040 bool ha_ndbcluster::start_bulk_update()
6041 {
6042 DBUG_ENTER("ha_ndbcluster::start_bulk_update");
6043 if (!m_use_write && m_ignore_dup_key)
6044 {
6045 DBUG_PRINT("info", ("Batching turned off as duplicate key is "
6046 "ignored by using peek_row"));
6047 DBUG_RETURN(TRUE);
6048 }
6049 DBUG_RETURN(FALSE);
6050 }
6051
bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)6052 int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,
6053 uint *dup_key_found)
6054 {
6055 DBUG_ENTER("ha_ndbcluster::bulk_update_row");
6056 *dup_key_found= 0;
6057 DBUG_RETURN(ndb_update_row(old_data, new_data, 1));
6058 }
6059
exec_bulk_update(uint * dup_key_found)6060 int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)
6061 {
6062 NdbTransaction* trans= m_thd_ndb->trans;
6063 DBUG_ENTER("ha_ndbcluster::exec_bulk_update");
6064 *dup_key_found= 0;
6065
6066 // m_handler must be NULL or point to _this_ handler instance
6067 assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
6068
6069 if (m_thd_ndb->m_handler &&
6070 m_read_before_write_removal_possible)
6071 {
6072 /*
6073 This is an autocommit involving only one table and rbwr is on
6074
6075 Commit the autocommit transaction early(before the usual place
6076 in ndbcluster_commit) in order to:
6077 1) save one round trip, "no-commit+commit" converted to "commit"
6078 2) return the correct number of updated and affected rows
6079 to the update loop(which will ask handler in rbwr mode)
6080 */
6081 DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
6082 uint ignore_count= 0;
6083 const int ignore_error= 1;
6084 if (execute_commit(m_thd_ndb, trans,
6085 m_thd_ndb->m_force_send, ignore_error,
6086 &ignore_count) != 0)
6087 {
6088 no_uncommitted_rows_execute_failure();
6089 DBUG_RETURN(ndb_err(trans));
6090 }
6091 THD *thd= table->in_use;
6092 if (!applying_binlog(thd))
6093 {
6094 DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
6095 assert(m_rows_changed >= ignore_count);
6096 assert(m_rows_updated >= ignore_count);
6097 m_rows_changed-= ignore_count;
6098 m_rows_updated-= ignore_count;
6099 }
6100 DBUG_RETURN(0);
6101 }
6102
6103 if (m_thd_ndb->m_unsent_bytes == 0)
6104 {
6105 DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
6106 DBUG_RETURN(0);
6107 }
6108
6109 if (thd_allow_batch(table->in_use))
6110 {
6111 /*
6112 Turned on by @@transaction_allow_batching=ON
6113 or implicitly by slave exec thread
6114 */
6115 DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
6116 DBUG_RETURN(0);
6117 }
6118
6119 if (m_thd_ndb->m_handler &&
6120 !m_blobs_pending)
6121 {
6122 // Execute at commit time(in 'ndbcluster_commit') to save a round trip
6123 DBUG_PRINT("exit", ("skip execute - simple autocommit"));
6124 DBUG_RETURN(0);
6125 }
6126
6127 uint ignore_count= 0;
6128 if (execute_no_commit(m_thd_ndb, trans,
6129 m_ignore_no_key || m_read_before_write_removal_used,
6130 &ignore_count) != 0)
6131 {
6132 no_uncommitted_rows_execute_failure();
6133 DBUG_RETURN(ndb_err(trans));
6134 }
6135 THD *thd= table->in_use;
6136 if (!applying_binlog(thd))
6137 {
6138 assert(m_rows_changed >= ignore_count);
6139 assert(m_rows_updated >= ignore_count);
6140 m_rows_changed-= ignore_count;
6141 m_rows_updated-= ignore_count;
6142 }
6143 DBUG_RETURN(0);
6144 }
6145
end_bulk_update()6146 void ha_ndbcluster::end_bulk_update()
6147 {
6148 DBUG_ENTER("ha_ndbcluster::end_bulk_update");
6149 DBUG_VOID_RETURN;
6150 }
6151
update_row(const uchar * old_data,uchar * new_data)6152 int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)
6153 {
6154 return ndb_update_row(old_data, new_data, 0);
6155 }
6156
6157 void
setup_key_ref_for_ndb_record(const NdbRecord ** key_rec,const uchar ** key_row,const uchar * record,bool use_active_index)6158 ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,
6159 const uchar **key_row,
6160 const uchar *record,
6161 bool use_active_index)
6162 {
6163 DBUG_ENTER("setup_key_ref_for_ndb_record");
6164 if (use_active_index)
6165 {
6166 /* Use unique key to access table */
6167 DBUG_PRINT("info", ("Using unique index (%u)", active_index));
6168 assert((table->key_info[active_index].flags & HA_NOSAME));
6169 /* Can't use key if we didn't read it first */
6170 assert(bitmap_is_subset(m_key_fields[active_index], table->read_set));
6171 *key_rec= m_index[active_index].ndb_unique_record_row;
6172 *key_row= record;
6173 }
6174 else if (table_share->primary_key != MAX_KEY)
6175 {
6176 /* Use primary key to access table */
6177 DBUG_PRINT("info", ("Using primary key"));
6178 /* Can't use pk if we didn't read it first */
6179 assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
6180 *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
6181 *key_row= record;
6182 }
6183 else
6184 {
6185 /* Use hidden primary key previously read into m_ref. */
6186 DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));
6187 /* Can't use hidden pk if we didn't read it first */
6188 assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
6189 assert(m_read_before_write_removal_used == false);
6190 *key_rec= m_ndb_hidden_key_record;
6191 *key_row= (const uchar *)(&m_ref);
6192 }
6193 DBUG_VOID_RETURN;
6194 }
6195
6196
6197 /*
6198 Update one record in NDB using primary key
6199 */
6200
ndb_update_row(const uchar * old_data,uchar * new_data,int is_bulk_update)6201 int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,
6202 int is_bulk_update)
6203 {
6204 THD *thd= table->in_use;
6205 Thd_ndb *thd_ndb= m_thd_ndb;
6206 NdbScanOperation* cursor= m_active_cursor;
6207 const NdbOperation *op;
6208 uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);
6209 int error;
6210 longlong func_value;
6211 Uint32 func_value_uint32;
6212 bool have_pk= (table_share->primary_key != MAX_KEY);
6213 bool pk_update= (!m_read_before_write_removal_possible &&
6214 have_pk &&
6215 bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&
6216 primary_key_cmp(old_data, new_data));
6217 bool batch_allowed= !m_update_cannot_batch &&
6218 (is_bulk_update || thd_allow_batch(thd));
6219 NdbOperation::SetValueSpec sets[2];
6220 Uint32 num_sets= 0;
6221
6222 DBUG_ENTER("ndb_update_row");
6223
6224 /* Start a transaction now if none available
6225 * (Manual Binlog application...)
6226 */
6227 /* TODO : Consider hinting */
6228 if (unlikely((!m_thd_ndb->trans) &&
6229 !get_transaction(error)))
6230 {
6231 DBUG_RETURN(error);
6232 }
6233
6234 NdbTransaction *trans= m_thd_ndb->trans;
6235 assert(trans);
6236
6237 error = check_slave_state(thd);
6238 if (unlikely(error))
6239 DBUG_RETURN(error);
6240
6241 /*
6242 * If IGNORE the ignore constraint violations on primary and unique keys,
6243 * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
6244 */
6245 if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
6246 thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
6247 {
6248 NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;
6249 int peek_res= peek_indexed_rows(new_data, write_op);
6250
6251 if (!peek_res)
6252 {
6253 DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
6254 }
6255 if (peek_res != HA_ERR_KEY_NOT_FOUND)
6256 DBUG_RETURN(peek_res);
6257 }
6258
6259 ha_statistic_increment(&SSV::ha_update_count);
6260
6261 bool skip_partition_for_unique_index= FALSE;
6262 if (m_use_partition_pruning)
6263 {
6264 if (!cursor && m_read_before_write_removal_used)
6265 {
6266 ndb_index_type type= get_index_type(active_index);
6267 /*
6268 Ndb unique indexes are global so when
6269 m_read_before_write_removal_used is active
6270 the unique index can be used directly for update
6271 without finding the partitions
6272 */
6273 if (type == UNIQUE_INDEX ||
6274 type == UNIQUE_ORDERED_INDEX)
6275 {
6276 skip_partition_for_unique_index= TRUE;
6277 goto skip_partition_pruning;
6278 }
6279 }
6280 if ((error= get_parts_for_update(old_data, new_data, table->record[0],
6281 m_part_info, &old_part_id, &new_part_id,
6282 &func_value)))
6283 {
6284 m_part_info->err_value= func_value;
6285 DBUG_RETURN(error);
6286 }
6287 DBUG_PRINT("info", ("old_part_id: %u new_part_id: %u", old_part_id, new_part_id));
6288 skip_partition_pruning:
6289 (void)0;
6290 }
6291
6292 /*
6293 * Check for update of primary key or partition change
6294 * for special handling
6295 */
6296 if (pk_update || old_part_id != new_part_id)
6297 {
6298 DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data));
6299 }
6300 /*
6301 If we are updating a unique key with auto_increment
6302 then we need to update the auto_increment counter
6303 */
6304 if (table->found_next_number_field &&
6305 bitmap_is_set(table->write_set,
6306 table->found_next_number_field->field_index) &&
6307 (error= set_auto_inc(thd, table->found_next_number_field)))
6308 {
6309 DBUG_RETURN(error);
6310 }
6311 /*
6312 Set only non-primary-key attributes.
6313 We already checked that any primary key attribute in write_set has no
6314 real changes.
6315 */
6316 bitmap_copy(&m_bitmap, table->write_set);
6317 bitmap_subtract(&m_bitmap, m_pk_bitmap_p);
6318 uchar *mask= (uchar *)(m_bitmap.bitmap);
6319 assert(!pk_update);
6320
6321 NdbOperation::OperationOptions *poptions = NULL;
6322 NdbOperation::OperationOptions options;
6323 options.optionsPresent=0;
6324
6325 /* Need to set the value of any user-defined partitioning function.
6326 (excecpt for when using unique index)
6327 */
6328 if (m_user_defined_partitioning && !skip_partition_for_unique_index)
6329 {
6330 if (func_value >= INT_MAX32)
6331 func_value_uint32= INT_MAX32;
6332 else
6333 func_value_uint32= (uint32)func_value;
6334 sets[num_sets].column= get_partition_id_column();
6335 sets[num_sets].value= &func_value_uint32;
6336 num_sets++;
6337
6338 if (!cursor)
6339 {
6340 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
6341 options.partitionId= new_part_id;
6342 }
6343 }
6344
6345 eventSetAnyValue(thd, &options);
6346
6347 const bool need_flush=
6348 thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);
6349
6350 const Uint32 authorValue = 1;
6351 if ((thd->slave_thread) &&
6352 (m_table->getExtraRowAuthorBits()))
6353 {
6354 /* Set author to indicate slave updated last */
6355 sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
6356 sets[num_sets].value= &authorValue;
6357 num_sets++;
6358 }
6359
6360 if (num_sets)
6361 {
6362 options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;
6363 options.extraSetValues= sets;
6364 options.numExtraSetValues= num_sets;
6365 }
6366
6367 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
6368 {
6369 options.optionsPresent |=
6370 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
6371 }
6372
6373 if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
6374 {
6375 DBUG_PRINT("info", ("Disabling foreign keys"));
6376 options.optionsPresent |=
6377 NdbOperation::OperationOptions::OO_DISABLE_FK;
6378 }
6379
6380 if (cursor)
6381 {
6382 /*
6383 We are scanning records and want to update the record
6384 that was just found, call updateCurrentTuple on the cursor
6385 to take over the lock to a new update operation
6386 And thus setting the primary key of the record from
6387 the active record in cursor
6388 */
6389 DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",
6390 table->write_set->bitmap[0]));
6391
6392 if (options.optionsPresent != 0)
6393 poptions = &options;
6394
6395 if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,
6396 (const char*)new_data, mask,
6397 poptions,
6398 sizeof(NdbOperation::OperationOptions))))
6399 ERR_RETURN(trans->getNdbError());
6400
6401 m_lock_tuple= FALSE;
6402 thd_ndb->m_unsent_bytes+= 12;
6403 }
6404 else
6405 {
6406 const NdbRecord *key_rec;
6407 const uchar *key_row;
6408 setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,
6409 m_read_before_write_removal_used);
6410
6411 bool avoidNdbApiWriteOp = true; /* Default update op for ndb_update_row */
6412 #ifdef HAVE_NDB_BINLOG
6413 Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
6414 NdbInterpretedCode code(m_table, buffer,
6415 sizeof(buffer)/sizeof(buffer[0]));
6416
6417 if (thd->slave_thread)
6418 {
6419 bool conflict_handled = false;
6420 /* Conflict resolution in slave thread. */
6421 DBUG_PRINT("info", ("Slave thread, preparing conflict resolution for update with mask : %x", *((Uint32*)mask)));
6422
6423 if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,
6424 key_rec,
6425 m_ndb_record,
6426 old_data,
6427 new_data,
6428 table->write_set,
6429 trans,
6430 &code,
6431 &options,
6432 conflict_handled,
6433 avoidNdbApiWriteOp))))
6434 DBUG_RETURN(error);
6435
6436 if (unlikely(conflict_handled))
6437 {
6438 /* No need to continue with operation defintion */
6439 /* TODO : Ensure batch execution */
6440 DBUG_RETURN(0);
6441 }
6442 }
6443 #endif /* HAVE_NDB_BINLOG */
6444 if (options.optionsPresent !=0)
6445 poptions= &options;
6446
6447 if (likely(avoidNdbApiWriteOp))
6448 {
6449 if (!(op= trans->updateTuple(key_rec, (const char *)key_row,
6450 m_ndb_record, (const char*)new_data, mask,
6451 poptions,
6452 sizeof(NdbOperation::OperationOptions))))
6453 ERR_RETURN(trans->getNdbError());
6454 }
6455 else
6456 {
6457 DBUG_PRINT("info", ("Update op using writeTuple"));
6458 if (!(op= trans->writeTuple(key_rec, (const char *)key_row,
6459 m_ndb_record, (const char*)new_data, mask,
6460 poptions,
6461 sizeof(NdbOperation::OperationOptions))))
6462 ERR_RETURN(trans->getNdbError());
6463 }
6464 }
6465
6466 uint blob_count= 0;
6467 if (uses_blob_value(table->write_set))
6468 {
6469 int row_offset= (int)(new_data - table->record[0]);
6470 int res= set_blob_values(op, row_offset, table->write_set, &blob_count,
6471 (batch_allowed && !need_flush));
6472 if (res != 0)
6473 DBUG_RETURN(res);
6474 }
6475 uint ignore_count= 0;
6476 /*
6477 Batch update operation if we are doing a scan for update, unless
6478 there exist UPDATE AFTER triggers
6479 */
6480 if (m_update_cannot_batch ||
6481 !(cursor || (batch_allowed && have_pk)) ||
6482 need_flush)
6483 {
6484 if (execute_no_commit(m_thd_ndb, trans,
6485 m_ignore_no_key || m_read_before_write_removal_used,
6486 &ignore_count) != 0)
6487 {
6488 no_uncommitted_rows_execute_failure();
6489 DBUG_RETURN(ndb_err(trans));
6490 }
6491 }
6492 else if (blob_count > 0)
6493 m_blobs_pending= TRUE;
6494
6495 m_rows_changed++;
6496 m_rows_updated++;
6497
6498 if (!applying_binlog(thd))
6499 {
6500 assert(m_rows_changed >= ignore_count);
6501 assert(m_rows_updated >= ignore_count);
6502 m_rows_changed-= ignore_count;
6503 m_rows_updated-= ignore_count;
6504 }
6505
6506 DBUG_RETURN(0);
6507 }
6508
6509
6510 /*
6511 handler delete interface
6512 */
6513
delete_row(const uchar * record)6514 int ha_ndbcluster::delete_row(const uchar *record)
6515 {
6516 return ndb_delete_row(record, FALSE);
6517 }
6518
start_bulk_delete()6519 bool ha_ndbcluster::start_bulk_delete()
6520 {
6521 DBUG_ENTER("start_bulk_delete");
6522 m_is_bulk_delete = true;
6523 DBUG_RETURN(0); // Bulk delete used by handler
6524 }
6525
end_bulk_delete()6526 int ha_ndbcluster::end_bulk_delete()
6527 {
6528 NdbTransaction* trans= m_thd_ndb->trans;
6529 DBUG_ENTER("end_bulk_delete");
6530 assert(m_is_bulk_delete); // Don't allow end() without start()
6531 m_is_bulk_delete = false;
6532
6533 // m_handler must be NULL or point to _this_ handler instance
6534 assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
6535
6536 if (m_thd_ndb->m_handler &&
6537 m_read_before_write_removal_possible)
6538 {
6539 /*
6540 This is an autocommit involving only one table and rbwr is on
6541
6542 Commit the autocommit transaction early(before the usual place
6543 in ndbcluster_commit) in order to:
6544 1) save one round trip, "no-commit+commit" converted to "commit"
6545 2) return the correct number of updated and affected rows
6546 to the delete loop(which will ask handler in rbwr mode)
6547 */
6548 DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
6549 uint ignore_count= 0;
6550 const int ignore_error= 1;
6551 if (execute_commit(m_thd_ndb, trans,
6552 m_thd_ndb->m_force_send, ignore_error,
6553 &ignore_count) != 0)
6554 {
6555 no_uncommitted_rows_execute_failure();
6556 m_rows_deleted = 0;
6557 DBUG_RETURN(ndb_err(trans));
6558 }
6559 THD *thd= table->in_use;
6560 if (!applying_binlog(thd))
6561 {
6562 DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
6563 assert(m_rows_deleted >= ignore_count);
6564 m_rows_deleted-= ignore_count;
6565 }
6566 DBUG_RETURN(0);
6567 }
6568
6569 if (m_thd_ndb->m_unsent_bytes == 0)
6570 {
6571 DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
6572 DBUG_RETURN(0);
6573 }
6574
6575 if (thd_allow_batch(table->in_use))
6576 {
6577 /*
6578 Turned on by @@transaction_allow_batching=ON
6579 or implicitly by slave exec thread
6580 */
6581 DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
6582 DBUG_RETURN(0);
6583 }
6584
6585 if (m_thd_ndb->m_handler)
6586 {
6587 // Execute at commit time(in 'ndbcluster_commit') to save a round trip
6588 DBUG_PRINT("exit", ("skip execute - simple autocommit"));
6589 DBUG_RETURN(0);
6590 }
6591
6592 uint ignore_count= 0;
6593 if (execute_no_commit(m_thd_ndb, trans,
6594 m_ignore_no_key || m_read_before_write_removal_used,
6595 &ignore_count) != 0)
6596 {
6597 no_uncommitted_rows_execute_failure();
6598 DBUG_RETURN(ndb_err(trans));
6599 }
6600
6601 THD *thd= table->in_use;
6602 if (!applying_binlog(thd))
6603 {
6604 assert(m_rows_deleted >= ignore_count);
6605 m_rows_deleted-= ignore_count;
6606 no_uncommitted_rows_update(ignore_count);
6607 }
6608 DBUG_RETURN(0);
6609 }
6610
6611
6612 /**
6613 Delete one record from NDB, using primary key .
6614 */
6615
ndb_delete_row(const uchar * record,bool primary_key_update)6616 int ha_ndbcluster::ndb_delete_row(const uchar *record,
6617 bool primary_key_update)
6618 {
6619 THD *thd= table->in_use;
6620 Thd_ndb *thd_ndb= m_thd_ndb;
6621 NdbScanOperation* cursor= m_active_cursor;
6622 const NdbOperation *op;
6623 uint32 part_id= ~uint32(0);
6624 int error;
6625 bool allow_batch= !m_delete_cannot_batch &&
6626 (m_is_bulk_delete || thd_allow_batch(thd));
6627
6628 DBUG_ENTER("ndb_delete_row");
6629
6630 /* Start a transaction now if none available
6631 * (Manual Binlog application...)
6632 */
6633 /* TODO : Consider hinting */
6634 if (unlikely((!m_thd_ndb->trans) &&
6635 !get_transaction(error)))
6636 {
6637 DBUG_RETURN(error);
6638 }
6639
6640 NdbTransaction *trans= m_thd_ndb->trans;
6641 assert(trans);
6642
6643 error = check_slave_state(thd);
6644 if (unlikely(error))
6645 DBUG_RETURN(error);
6646
6647 ha_statistic_increment(&SSV::ha_delete_count);
6648 m_rows_changed++;
6649
6650 bool skip_partition_for_unique_index= FALSE;
6651 if (m_use_partition_pruning)
6652 {
6653 if (!cursor && m_read_before_write_removal_used)
6654 {
6655 ndb_index_type type= get_index_type(active_index);
6656 /*
6657 Ndb unique indexes are global so when
6658 m_read_before_write_removal_used is active
6659 the unique index can be used directly for deleting
6660 without finding the partitions
6661 */
6662 if (type == UNIQUE_INDEX ||
6663 type == UNIQUE_ORDERED_INDEX)
6664 {
6665 skip_partition_for_unique_index= TRUE;
6666 goto skip_partition_pruning;
6667 }
6668 }
6669 if ((error= get_part_for_delete(record, table->record[0], m_part_info,
6670 &part_id)))
6671 {
6672 DBUG_RETURN(error);
6673 }
6674 skip_partition_pruning:
6675 (void)0;
6676 }
6677
6678 NdbOperation::OperationOptions options;
6679 NdbOperation::OperationOptions *poptions = NULL;
6680 options.optionsPresent=0;
6681
6682 eventSetAnyValue(thd, &options);
6683
6684 /*
6685 Poor approx. let delete ~ tabsize / 4
6686 */
6687 uint delete_size= 12 + (m_bytes_per_write >> 2);
6688 const bool need_flush =
6689 thd_ndb->add_row_check_if_batch_full(delete_size);
6690
6691 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
6692 {
6693 options.optionsPresent |=
6694 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
6695 }
6696
6697 if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
6698 {
6699 DBUG_PRINT("info", ("Disabling foreign keys"));
6700 options.optionsPresent |=
6701 NdbOperation::OperationOptions::OO_DISABLE_FK;
6702 }
6703
6704 if (cursor)
6705 {
6706 if (options.optionsPresent != 0)
6707 poptions = &options;
6708
6709 /*
6710 We are scanning records and want to delete the record
6711 that was just found, call deleteTuple on the cursor
6712 to take over the lock to a new delete operation
6713 And thus setting the primary key of the record from
6714 the active record in cursor
6715 */
6716 DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
6717 if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,
6718 NULL, // result_row
6719 NULL, // result_mask
6720 poptions,
6721 sizeof(NdbOperation::OperationOptions))) == 0)
6722 ERR_RETURN(trans->getNdbError());
6723 m_lock_tuple= FALSE;
6724 thd_ndb->m_unsent_bytes+= 12;
6725
6726 no_uncommitted_rows_update(-1);
6727 m_rows_deleted++;
6728
6729 if (!(primary_key_update || m_delete_cannot_batch))
6730 {
6731 // If deleting from cursor, NoCommit will be handled in next_result
6732 DBUG_RETURN(0);
6733 }
6734 }
6735 else
6736 {
6737 const NdbRecord *key_rec;
6738 const uchar *key_row;
6739
6740 if (m_user_defined_partitioning && !skip_partition_for_unique_index)
6741 {
6742 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
6743 options.partitionId= part_id;
6744 }
6745
6746 setup_key_ref_for_ndb_record(&key_rec, &key_row, record,
6747 m_read_before_write_removal_used);
6748
6749 #ifdef HAVE_NDB_BINLOG
6750 Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
6751 NdbInterpretedCode code(m_table, buffer,
6752 sizeof(buffer)/sizeof(buffer[0]));
6753 if (thd->slave_thread)
6754 {
6755 bool conflict_handled = false;
6756 bool dummy_delete_does_not_care = false;
6757
6758 /* Conflict resolution in slave thread. */
6759 if (unlikely((error = prepare_conflict_detection(DELETE_ROW,
6760 key_rec,
6761 m_ndb_record,
6762 key_row, /* old_data */
6763 NULL, /* new_data */
6764 table->write_set,
6765 trans,
6766 &code,
6767 &options,
6768 conflict_handled,
6769 dummy_delete_does_not_care))))
6770 DBUG_RETURN(error);
6771
6772 if (unlikely(conflict_handled))
6773 {
6774 /* No need to continue with operation definition */
6775 /* TODO : Ensure batch execution */
6776 DBUG_RETURN(0);
6777 }
6778 }
6779 #endif /* HAVE_NDB_BINLOG */
6780 if (options.optionsPresent != 0)
6781 poptions= &options;
6782
6783 if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,
6784 m_ndb_record,
6785 NULL, // row
6786 NULL, // mask
6787 poptions,
6788 sizeof(NdbOperation::OperationOptions))))
6789 ERR_RETURN(trans->getNdbError());
6790
6791 no_uncommitted_rows_update(-1);
6792 m_rows_deleted++;
6793
6794 /*
6795 Check if we can batch the delete.
6796
6797 We don't batch deletes as part of primary key updates.
6798 We do not batch deletes on tables with no primary key. For such tables,
6799 replication uses full table scan to locate the row to delete. The
6800 problem is the following scenario when deleting 2 (or more) rows:
6801
6802 1. Table scan to locate the first row.
6803 2. Delete the row, batched so no execute.
6804 3. Table scan to locate the second row is executed, along with the
6805 batched delete operation from step 2.
6806 4. The first row is returned from nextResult() (not deleted yet).
6807 5. The kernel deletes the row (operation from step 2).
6808 6. lockCurrentTuple() is called on the row returned in step 4. However,
6809 as that row is now deleted, the operation fails and the transaction
6810 is aborted.
6811 7. The delete of the second tuple now fails, as the transaction has
6812 been aborted.
6813 */
6814
6815 if ( allow_batch &&
6816 table_share->primary_key != MAX_KEY &&
6817 !primary_key_update &&
6818 !need_flush)
6819 {
6820 DBUG_RETURN(0);
6821 }
6822 }
6823
6824 // Execute delete operation
6825 uint ignore_count= 0;
6826 if (execute_no_commit(m_thd_ndb, trans,
6827 m_ignore_no_key || m_read_before_write_removal_used,
6828 &ignore_count) != 0)
6829 {
6830 no_uncommitted_rows_execute_failure();
6831 DBUG_RETURN(ndb_err(trans));
6832 }
6833 if (!primary_key_update)
6834 {
6835 if (!applying_binlog(thd))
6836 {
6837 assert(m_rows_deleted >= ignore_count);
6838 m_rows_deleted-= ignore_count;
6839 no_uncommitted_rows_update(ignore_count);
6840 }
6841 }
6842 DBUG_RETURN(0);
6843 }
6844
6845 /**
6846 Unpack a record returned from a scan.
6847 We copy field-for-field to
6848 1. Avoid unnecessary copying for sparse rows.
6849 2. Properly initialize not used null bits.
6850 Note that we do not unpack all returned rows; some primary/unique key
6851 operations can read directly into the destination row.
6852 */
unpack_record(uchar * dst_row,const uchar * src_row)6853 void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)
6854 {
6855 int res;
6856 assert(src_row != NULL);
6857
6858 my_ptrdiff_t dst_offset= dst_row - table->record[0];
6859 my_ptrdiff_t src_offset= src_row - table->record[0];
6860
6861 /* Initialize the NULL bitmap. */
6862 memset(dst_row, 0xff, table->s->null_bytes);
6863
6864 uchar *blob_ptr= m_blobs_buffer;
6865
6866 for (uint i= 0; i < table_share->fields; i++)
6867 {
6868 Field *field= table->field[i];
6869 if (bitmap_is_set(table->read_set, i))
6870 {
6871 if (field->type() == MYSQL_TYPE_BIT)
6872 {
6873 Field_bit *field_bit= static_cast<Field_bit*>(field);
6874 if (!field->is_real_null(src_offset))
6875 {
6876 field->move_field_offset(src_offset);
6877 longlong value= field_bit->val_int();
6878 field->move_field_offset(dst_offset-src_offset);
6879 field_bit->set_notnull();
6880 /* Field_bit in DBUG requires the bit set in write_set for store(). */
6881 my_bitmap_map *old_map=
6882 dbug_tmp_use_all_columns(table, table->write_set);
6883 int res = field_bit->store(value, true);
6884 assert(res == 0); NDB_IGNORE_VALUE(res);
6885 dbug_tmp_restore_column_map(table->write_set, old_map);
6886 field->move_field_offset(-dst_offset);
6887 }
6888 }
6889 else if (field->flags & BLOB_FLAG)
6890 {
6891 Field_blob *field_blob= (Field_blob *)field;
6892 NdbBlob *ndb_blob= m_value[i].blob;
6893 /* unpack_record *only* called for scan result processing
6894 * *while* the scan is open and the Blob is active.
6895 * Verify Blob state to be certain.
6896 * Accessing PK/UK op Blobs after execute() is unsafe
6897 */
6898 assert(ndb_blob != 0);
6899 assert(ndb_blob->getState() == NdbBlob::Active);
6900 int isNull;
6901 res= ndb_blob->getNull(isNull);
6902 assert(res == 0); // Already succeeded once
6903 Uint64 len64= 0;
6904 field_blob->move_field_offset(dst_offset);
6905 if (!isNull)
6906 {
6907 res= ndb_blob->getLength(len64);
6908 assert(res == 0 && len64 <= (Uint64)0xffffffff);
6909
6910 if(len64 > field_blob->max_data_length())
6911 {
6912 len64 = calc_ndb_blob_len(ndb_blob->getColumn()->getCharset(),
6913 blob_ptr, field_blob->max_data_length());
6914
6915 // push a warning
6916 push_warning_printf(table->in_use, Sql_condition::SL_WARNING,
6917 WARN_DATA_TRUNCATED,
6918 "Truncated value from TEXT field \'%s\'", field_blob->field_name);
6919
6920 }
6921 field->set_notnull();
6922 }
6923 /* Need not set_null(), as we initialized null bits to 1 above. */
6924 field_blob->set_ptr((uint32)len64, blob_ptr);
6925 field_blob->move_field_offset(-dst_offset);
6926 blob_ptr+= (len64 + 7) & ~((Uint64)7);
6927 }
6928 else
6929 {
6930 field->move_field_offset(src_offset);
6931 /* Normal field (not blob or bit type). */
6932 if (!field->is_null())
6933 {
6934 /* Only copy actually used bytes of varstrings. */
6935 uint32 actual_length= field_used_length(field);
6936 uchar *src_ptr= field->ptr;
6937 field->move_field_offset(dst_offset - src_offset);
6938 field->set_notnull();
6939 memcpy(field->ptr, src_ptr, actual_length);
6940 field->move_field_offset(-dst_offset);
6941 }
6942 else
6943 field->move_field_offset(-src_offset);
6944 /* No action needed for a NULL field. */
6945 }
6946 }
6947 }
6948 }
6949
6950
6951 /**
6952 Get the default value of the field from default_values of the table.
6953 */
get_default_value(void * def_val,Field * field)6954 static void get_default_value(void *def_val, Field *field)
6955 {
6956 assert(field != NULL);
6957
6958 my_ptrdiff_t src_offset= field->table->default_values_offset();
6959
6960 {
6961 if (bitmap_is_set(field->table->read_set, field->field_index))
6962 {
6963 if (field->type() == MYSQL_TYPE_BIT)
6964 {
6965 Field_bit *field_bit= static_cast<Field_bit*>(field);
6966 if (!field->is_real_null(src_offset))
6967 {
6968 field->move_field_offset(src_offset);
6969 longlong value= field_bit->val_int();
6970 /* Map to NdbApi format - two Uint32s */
6971 Uint32 out[2];
6972 out[0] = 0;
6973 out[1] = 0;
6974 for (int b=0; b < 64; b++)
6975 {
6976 out[b >> 5] |= (value & 1) << (b & 31);
6977
6978 value= value >> 1;
6979 }
6980 memcpy(def_val, out, sizeof(longlong));
6981 field->move_field_offset(-src_offset);
6982 }
6983 }
6984 else if (field->flags & BLOB_FLAG)
6985 {
6986 assert(false);
6987 }
6988 else
6989 {
6990 field->move_field_offset(src_offset);
6991 /* Normal field (not blob or bit type). */
6992 if (!field->is_null())
6993 {
6994 /* Only copy actually used bytes of varstrings. */
6995 uint32 actual_length= field_used_length(field);
6996 uchar *src_ptr= field->ptr;
6997 field->set_notnull();
6998 memcpy(def_val, src_ptr, actual_length);
6999 }
7000 field->move_field_offset(-src_offset);
7001 /* No action needed for a NULL field. */
7002 }
7003 }
7004 }
7005 }
7006
7007 /*
7008 DBUG_EXECUTE("value", print_results(););
7009 */
7010
print_results()7011 void ha_ndbcluster::print_results()
7012 {
7013 DBUG_ENTER("print_results");
7014
7015 #ifndef NDEBUG
7016
7017 char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
7018 String type(buf_type, sizeof(buf_type), &my_charset_bin);
7019 String val(buf_val, sizeof(buf_val), &my_charset_bin);
7020 for (uint f= 0; f < table_share->fields; f++)
7021 {
7022 /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
7023 char buf[2000];
7024 Field *field;
7025 void* ptr;
7026 NdbValue value;
7027
7028 buf[0]= 0;
7029 field= table->field[f];
7030 if (!(value= m_value[f]).ptr)
7031 {
7032 my_stpcpy(buf, "not read");
7033 goto print_value;
7034 }
7035
7036 ptr= field->ptr;
7037
7038 if (! (field->flags & BLOB_FLAG))
7039 {
7040 if (value.rec->isNULL())
7041 {
7042 my_stpcpy(buf, "NULL");
7043 goto print_value;
7044 }
7045 type.length(0);
7046 val.length(0);
7047 field->sql_type(type);
7048 field->val_str(&val);
7049 my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
7050 }
7051 else
7052 {
7053 NdbBlob *ndb_blob= value.blob;
7054 bool isNull= TRUE;
7055 assert(ndb_blob->getState() == NdbBlob::Active);
7056 ndb_blob->getNull(isNull);
7057 if (isNull)
7058 my_stpcpy(buf, "NULL");
7059 }
7060
7061 print_value:
7062 DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
7063 }
7064 #endif
7065 DBUG_VOID_RETURN;
7066 }
7067
7068
index_init(uint index,bool sorted)7069 int ha_ndbcluster::index_init(uint index, bool sorted)
7070 {
7071 DBUG_ENTER("ha_ndbcluster::index_init");
7072 DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted));
7073 active_index= index;
7074 m_sorted= sorted;
7075 /*
7076 Locks are are explicitly released in scan
7077 unless m_lock.type == TL_READ_HIGH_PRIORITY
7078 and no sub-sequent call to unlock_row()
7079 */
7080 m_lock_tuple= FALSE;
7081
7082 if (table_share->primary_key == MAX_KEY &&
7083 m_use_partition_pruning)
7084 {
7085 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
7086 }
7087
7088 DBUG_RETURN(0);
7089 }
7090
7091
index_end()7092 int ha_ndbcluster::index_end()
7093 {
7094 DBUG_ENTER("ha_ndbcluster::index_end");
7095 DBUG_RETURN(close_scan());
7096 }
7097
7098 /**
7099 Check if key contains null.
7100 */
7101 static
7102 int
check_null_in_key(const KEY * key_info,const uchar * key,uint key_len)7103 check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)
7104 {
7105 KEY_PART_INFO *curr_part, *end_part;
7106 const uchar* end_ptr= key + key_len;
7107 curr_part= key_info->key_part;
7108 end_part= curr_part + key_info->user_defined_key_parts;
7109
7110 for (; curr_part != end_part && key < end_ptr; curr_part++)
7111 {
7112 if (curr_part->null_bit && *key)
7113 return 1;
7114
7115 key += curr_part->store_length;
7116 }
7117 return 0;
7118 }
7119
index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)7120 int ha_ndbcluster::index_read(uchar *buf,
7121 const uchar *key, uint key_len,
7122 enum ha_rkey_function find_flag)
7123 {
7124 key_range start_key, end_key, *end_key_p=NULL;
7125 bool descending= FALSE;
7126 DBUG_ENTER("ha_ndbcluster::index_read");
7127 DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
7128 active_index, key_len, find_flag));
7129
7130 start_key.key= key;
7131 start_key.length= key_len;
7132 start_key.flag= find_flag;
7133 switch (find_flag) {
7134 case HA_READ_KEY_EXACT:
7135 /**
7136 * Specify as a closed EQ_RANGE.
7137 * Setting HA_READ_AFTER_KEY seems odd, but this is according
7138 * to MySQL convention, see opt_range.cc.
7139 */
7140 end_key.key= key;
7141 end_key.length= key_len;
7142 end_key.flag= HA_READ_AFTER_KEY;
7143 end_key_p= &end_key;
7144 break;
7145 case HA_READ_KEY_OR_PREV:
7146 case HA_READ_BEFORE_KEY:
7147 case HA_READ_PREFIX_LAST:
7148 case HA_READ_PREFIX_LAST_OR_PREV:
7149 descending= TRUE;
7150 break;
7151 default:
7152 break;
7153 }
7154 const int error= read_range_first_to_buf(&start_key, end_key_p,
7155 descending,
7156 m_sorted, buf);
7157 table->status=error ? STATUS_NOT_FOUND: 0;
7158 DBUG_RETURN(error);
7159 }
7160
7161
index_next(uchar * buf)7162 int ha_ndbcluster::index_next(uchar *buf)
7163 {
7164 DBUG_ENTER("ha_ndbcluster::index_next");
7165 ha_statistic_increment(&SSV::ha_read_next_count);
7166 const int error= next_result(buf);
7167 table->status=error ? STATUS_NOT_FOUND: 0;
7168 DBUG_RETURN(error);
7169 }
7170
7171
index_prev(uchar * buf)7172 int ha_ndbcluster::index_prev(uchar *buf)
7173 {
7174 DBUG_ENTER("ha_ndbcluster::index_prev");
7175 ha_statistic_increment(&SSV::ha_read_prev_count);
7176 const int error= next_result(buf);
7177 table->status=error ? STATUS_NOT_FOUND: 0;
7178 DBUG_RETURN(error);
7179 }
7180
7181
index_first(uchar * buf)7182 int ha_ndbcluster::index_first(uchar *buf)
7183 {
7184 DBUG_ENTER("ha_ndbcluster::index_first");
7185 ha_statistic_increment(&SSV::ha_read_first_count);
7186 // Start the ordered index scan and fetch the first row
7187
7188 // Only HA_READ_ORDER indexes get called by index_first
7189 const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);
7190 table->status=error ? STATUS_NOT_FOUND: 0;
7191 DBUG_RETURN(error);
7192 }
7193
7194
index_last(uchar * buf)7195 int ha_ndbcluster::index_last(uchar *buf)
7196 {
7197 DBUG_ENTER("ha_ndbcluster::index_last");
7198 ha_statistic_increment(&SSV::ha_read_last_count);
7199 const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);
7200 table->status=error ? STATUS_NOT_FOUND: 0;
7201 DBUG_RETURN(error);
7202 }
7203
index_read_last(uchar * buf,const uchar * key,uint key_len)7204 int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
7205 {
7206 DBUG_ENTER("ha_ndbcluster::index_read_last");
7207 DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
7208 }
7209
7210
read_range_first_to_buf(const key_range * start_key,const key_range * end_key,bool desc,bool sorted,uchar * buf)7211 int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
7212 const key_range *end_key,
7213 bool desc, bool sorted,
7214 uchar* buf)
7215 {
7216 part_id_range part_spec;
7217 ndb_index_type type= get_index_type(active_index);
7218 const KEY* key_info= table->key_info+active_index;
7219 int error;
7220 DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
7221 DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
7222
7223 if (unlikely((error= close_scan())))
7224 DBUG_RETURN(error);
7225
7226 if (m_use_partition_pruning)
7227 {
7228 assert(m_pushed_join_operation != PUSHED_ROOT);
7229 get_partition_set(table, buf, active_index, start_key, &part_spec);
7230 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
7231 part_spec.start_part, part_spec.end_part));
7232 /*
7233 If partition pruning has found no partition in set
7234 we can return HA_ERR_END_OF_FILE
7235 If partition pruning has found exactly one partition in set
7236 we can optimize scan to run towards that partition only.
7237 */
7238 if (part_spec.start_part > part_spec.end_part)
7239 {
7240 DBUG_RETURN(HA_ERR_END_OF_FILE);
7241 }
7242
7243 if (part_spec.start_part == part_spec.end_part)
7244 {
7245 /*
7246 Only one partition is required to scan, if sorted is required we
7247 don't need it any more since output from one ordered partitioned
7248 index is always sorted.
7249 */
7250 sorted= FALSE;
7251 if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))
7252 {
7253 DBUG_RETURN(error);
7254 }
7255 }
7256 }
7257
7258 switch (type){
7259 case PRIMARY_KEY_ORDERED_INDEX:
7260 case PRIMARY_KEY_INDEX:
7261 if (start_key &&
7262 start_key->length == key_info->key_length &&
7263 start_key->flag == HA_READ_KEY_EXACT)
7264 {
7265 if (!m_thd_ndb->trans)
7266 if (unlikely(!start_transaction_key(active_index,
7267 start_key->key, error)))
7268 DBUG_RETURN(error);
7269 error= pk_read(start_key->key, start_key->length, buf,
7270 (m_use_partition_pruning)? &(part_spec.start_part) : NULL);
7271 DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
7272 }
7273 break;
7274 case UNIQUE_ORDERED_INDEX:
7275 case UNIQUE_INDEX:
7276 if (start_key && start_key->length == key_info->key_length &&
7277 start_key->flag == HA_READ_KEY_EXACT &&
7278 !check_null_in_key(key_info, start_key->key, start_key->length))
7279 {
7280 if (!m_thd_ndb->trans)
7281 if (unlikely(!start_transaction_key(active_index,
7282 start_key->key, error)))
7283 DBUG_RETURN(error);
7284 error= unique_index_read(start_key->key, start_key->length, buf);
7285 DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
7286 }
7287 else if (type == UNIQUE_INDEX)
7288 DBUG_RETURN(full_table_scan(key_info,
7289 start_key,
7290 end_key,
7291 buf));
7292 break;
7293 default:
7294 break;
7295 }
7296 if (!m_use_partition_pruning && !m_thd_ndb->trans)
7297 {
7298 get_partition_set(table, buf, active_index, start_key, &part_spec);
7299 if (part_spec.start_part == part_spec.end_part)
7300 if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))
7301 DBUG_RETURN(error);
7302 }
7303 // Start the ordered index scan and fetch the first row
7304 DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
7305 (m_use_partition_pruning)? &part_spec : NULL));
7306 }
7307
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_r,bool sorted)7308 int ha_ndbcluster::read_range_first(const key_range *start_key,
7309 const key_range *end_key,
7310 bool eq_r, bool sorted)
7311 {
7312 uchar* buf= table->record[0];
7313 DBUG_ENTER("ha_ndbcluster::read_range_first");
7314 DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
7315 sorted, buf));
7316 }
7317
read_range_next()7318 int ha_ndbcluster::read_range_next()
7319 {
7320 DBUG_ENTER("ha_ndbcluster::read_range_next");
7321 DBUG_RETURN(next_result(table->record[0]));
7322 }
7323
7324
rnd_init(bool scan)7325 int ha_ndbcluster::rnd_init(bool scan)
7326 {
7327 int error;
7328 DBUG_ENTER("rnd_init");
7329 DBUG_PRINT("enter", ("scan: %d", scan));
7330
7331 if ((error= close_scan()))
7332 DBUG_RETURN(error);
7333 index_init(table_share->primary_key, 0);
7334 DBUG_RETURN(0);
7335 }
7336
close_scan()7337 int ha_ndbcluster::close_scan()
7338 {
7339 DBUG_ENTER("close_scan");
7340
7341 if (m_active_query)
7342 {
7343 m_active_query->close(m_thd_ndb->m_force_send);
7344 m_active_query= NULL;
7345 }
7346
7347 NdbScanOperation *cursor= m_active_cursor;
7348 if (!cursor)
7349 {
7350 cursor = m_multi_cursor;
7351 if (!cursor)
7352 DBUG_RETURN(0);
7353 }
7354
7355 int error;
7356 NdbTransaction *trans= m_thd_ndb->trans;
7357 if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
7358 DBUG_RETURN(error);
7359
7360 if (m_thd_ndb->m_unsent_bytes)
7361 {
7362 /*
7363 Take over any pending transactions to the
7364 deleteing/updating transaction before closing the scan
7365 */
7366 DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
7367 (long) m_thd_ndb->m_unsent_bytes));
7368 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7369 {
7370 no_uncommitted_rows_execute_failure();
7371 DBUG_RETURN(ndb_err(trans));
7372 }
7373 }
7374
7375 cursor->close(m_thd_ndb->m_force_send, TRUE);
7376 m_active_cursor= NULL;
7377 m_multi_cursor= NULL;
7378 DBUG_RETURN(0);
7379 }
7380
rnd_end()7381 int ha_ndbcluster::rnd_end()
7382 {
7383 DBUG_ENTER("rnd_end");
7384 DBUG_RETURN(close_scan());
7385 }
7386
7387
rnd_next(uchar * buf)7388 int ha_ndbcluster::rnd_next(uchar *buf)
7389 {
7390 DBUG_ENTER("rnd_next");
7391 ha_statistic_increment(&SSV::ha_read_rnd_next_count);
7392
7393 int error;
7394 if (m_active_cursor || m_active_query)
7395 error= next_result(buf);
7396 else
7397 error= full_table_scan(NULL, NULL, NULL, buf);
7398
7399 table->status= error ? STATUS_NOT_FOUND: 0;
7400 DBUG_RETURN(error);
7401 }
7402
7403
7404 /**
7405 An "interesting" record has been found and it's pk
7406 retrieved by calling position. Now it's time to read
7407 the record from db once again.
7408 */
7409
rnd_pos(uchar * buf,uchar * pos)7410 int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
7411 {
7412 DBUG_ENTER("rnd_pos");
7413 ha_statistic_increment(&SSV::ha_read_rnd_count);
7414 // The primary key for the record is stored in pos
7415 // Perform a pk_read using primary key "index"
7416 {
7417 part_id_range part_spec;
7418 uint key_length= ref_length;
7419 if (m_user_defined_partitioning)
7420 {
7421 if (table_share->primary_key == MAX_KEY)
7422 {
7423 /*
7424 The partition id has been fetched from ndb
7425 and has been stored directly after the hidden key
7426 */
7427 DBUG_DUMP("key+part", pos, key_length);
7428 key_length= ref_length - sizeof(m_part_id);
7429 part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
7430 }
7431 else
7432 {
7433 key_range key_spec;
7434 KEY *key_info= table->key_info + table_share->primary_key;
7435 key_spec.key= pos;
7436 key_spec.length= key_length;
7437 key_spec.flag= HA_READ_KEY_EXACT;
7438 get_full_part_id_from_key(table, buf, key_info,
7439 &key_spec, &part_spec);
7440 assert(part_spec.start_part == part_spec.end_part);
7441 }
7442 DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
7443 }
7444 DBUG_DUMP("key", pos, key_length);
7445 int res= pk_read(pos, key_length, buf,
7446 (m_user_defined_partitioning) ?
7447 &(part_spec.start_part)
7448 : NULL);
7449 if (res == HA_ERR_KEY_NOT_FOUND)
7450 {
7451 /**
7452 * When using rnd_pos
7453 * server first retrives a set of records (typically scans them)
7454 * and store a unique identifier (for ndb this is the primary key)
7455 * and later retreives the record again using rnd_pos and the
7456 * saved primary key. For ndb, since we only support committed read
7457 * the record could have been deleted in between the "save" and
7458 * the rnd_pos.
7459 * Therefor we return HA_ERR_RECORD_DELETED in this case rather than
7460 * HA_ERR_KEY_NOT_FOUND (which will cause statment to be aborted)
7461 *
7462 */
7463 res= HA_ERR_RECORD_DELETED;
7464 }
7465 table->status= res ? STATUS_NOT_FOUND: 0;
7466 DBUG_RETURN(res);
7467 }
7468 }
7469
7470
7471 /**
7472 Store the primary key of this record in ref
7473 variable, so that the row can be retrieved again later
7474 using "reference" in rnd_pos.
7475 */
7476
position(const uchar * record)7477 void ha_ndbcluster::position(const uchar *record)
7478 {
7479 KEY *key_info;
7480 KEY_PART_INFO *key_part;
7481 KEY_PART_INFO *end;
7482 uchar *buff;
7483 uint key_length;
7484
7485 DBUG_ENTER("position");
7486
7487 if (table_share->primary_key != MAX_KEY)
7488 {
7489 key_length= ref_length;
7490 key_info= table->key_info + table_share->primary_key;
7491 key_part= key_info->key_part;
7492 end= key_part + key_info->user_defined_key_parts;
7493 buff= ref;
7494
7495 for (; key_part != end; key_part++)
7496 {
7497 if (key_part->null_bit) {
7498 /* Store 0 if the key part is a NULL part */
7499 if (record[key_part->null_offset]
7500 & key_part->null_bit) {
7501 *buff++= 1;
7502 continue;
7503 }
7504 *buff++= 0;
7505 }
7506
7507 size_t len = key_part->length;
7508 const uchar * ptr = record + key_part->offset;
7509 Field *field = key_part->field;
7510 if (field->type() == MYSQL_TYPE_VARCHAR)
7511 {
7512 size_t var_length;
7513 if (((Field_varstring*)field)->length_bytes == 1)
7514 {
7515 /**
7516 * Keys always use 2 bytes length
7517 */
7518 buff[0] = ptr[0];
7519 buff[1] = 0;
7520 var_length = ptr[0];
7521 assert(var_length <= len);
7522 memcpy(buff+2, ptr + 1, var_length);
7523 }
7524 else
7525 {
7526 var_length = ptr[0] + (ptr[1]*256);
7527 assert(var_length <= len);
7528 memcpy(buff, ptr, var_length + 2);
7529 }
7530 /**
7531 We have to zero-pad any unused VARCHAR buffer so that MySQL is
7532 able to use simple memcmp to compare two instances of the same
7533 unique key value to determine if they are equal.
7534 MySQL does this to compare contents of two 'ref' values.
7535 (Duplicate weedout algorithm is one such case.)
7536 */
7537 memset(buff+2+var_length, 0, len - var_length);
7538 len += 2;
7539 }
7540 else
7541 {
7542 memcpy(buff, ptr, len);
7543 }
7544 buff += len;
7545 }
7546 }
7547 else
7548 {
7549 // No primary key, get hidden key
7550 DBUG_PRINT("info", ("Getting hidden key"));
7551 // If table has user defined partition save the partition id as well
7552 if (m_user_defined_partitioning)
7553 {
7554 DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
7555 key_length= ref_length - sizeof(m_part_id);
7556 memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
7557 }
7558 else
7559 key_length= ref_length;
7560 #ifndef NDEBUG
7561 int hidden_no= table->s->fields;
7562 const NDBTAB *tab= m_table;
7563 const NDBCOL *hidden_col= tab->getColumn(hidden_no);
7564 assert(hidden_col->getPrimaryKey() &&
7565 hidden_col->getAutoIncrement() &&
7566 key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
7567 #endif
7568 memcpy(ref, &m_ref, key_length);
7569 }
7570 #ifndef NDEBUG
7571 if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning)
7572 DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));
7573 #endif
7574 DBUG_DUMP("ref", ref, key_length);
7575 DBUG_VOID_RETURN;
7576 }
7577
7578 int
cmp_ref(const uchar * ref1,const uchar * ref2)7579 ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)
7580 {
7581 DBUG_ENTER("cmp_ref");
7582
7583 if (table_share->primary_key != MAX_KEY)
7584 {
7585 KEY *key_info= table->key_info + table_share->primary_key;
7586 KEY_PART_INFO *key_part= key_info->key_part;
7587 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
7588
7589 for (; key_part != end; key_part++)
7590 {
7591 // NOTE: No need to check for null since PK is not-null
7592
7593 Field *field= key_part->field;
7594 int result= field->key_cmp(ref1, ref2);
7595 if (result)
7596 {
7597 DBUG_RETURN(result);
7598 }
7599
7600 if (field->type() == MYSQL_TYPE_VARCHAR)
7601 {
7602 ref1+= 2;
7603 ref2+= 2;
7604 }
7605
7606 ref1+= key_part->length;
7607 ref2+= key_part->length;
7608 }
7609 DBUG_RETURN(0);
7610 }
7611 else
7612 {
7613 DBUG_RETURN(memcmp(ref1, ref2, ref_length));
7614 }
7615 }
7616
info(uint flag)7617 int ha_ndbcluster::info(uint flag)
7618 {
7619 THD *thd= table->in_use;
7620 int result= 0;
7621 DBUG_ENTER("info");
7622 DBUG_PRINT("enter", ("flag: %d", flag));
7623
7624 if (flag & HA_STATUS_POS)
7625 DBUG_PRINT("info", ("HA_STATUS_POS"));
7626 if (flag & HA_STATUS_TIME)
7627 DBUG_PRINT("info", ("HA_STATUS_TIME"));
7628 if (flag & HA_STATUS_CONST)
7629 {
7630 /*
7631 Set size required by a single record in the MRR 'HANDLER_BUFFER'.
7632 MRR buffer has both a fixed and a variable sized part.
7633 Size is calculated assuming max size of the variable part.
7634
7635 See comments for multi_range_fixed_size() and
7636 multi_range_max_entry() regarding how the MRR buffer is organized.
7637 */
7638 stats.mrr_length_per_rec= multi_range_fixed_size(1) +
7639 multi_range_max_entry(PRIMARY_KEY_INDEX, table_share->reclength);
7640 }
7641 while (flag & HA_STATUS_VARIABLE)
7642 {
7643 if (!thd)
7644 thd= current_thd;
7645 DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
7646
7647 if (!m_table_info)
7648 {
7649 set_my_errno(check_ndb_connection(thd));
7650 if (my_errno())
7651 DBUG_RETURN(my_errno());
7652 }
7653
7654 /*
7655 May need to update local copy of statistics in
7656 'm_table_info', either directly from datanodes,
7657 or from shared (mutex protected) cached copy, if:
7658 1) 'use_exact_count' has been set (by config or user).
7659 2) HA_STATUS_NO_LOCK -> read from shared cached copy.
7660 3) Local copy is invalid.
7661 */
7662 bool exact_count= THDVAR(thd, use_exact_count);
7663 if (exact_count || // 1)
7664 !(flag & HA_STATUS_NO_LOCK) || // 2)
7665 m_table_info == NULL || // 3)
7666 m_table_info->records == ~(ha_rows)0) // 3)
7667 {
7668 result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));
7669 if (result)
7670 DBUG_RETURN(result);
7671 }
7672 /* Read from local statistics, fast and fuzzy, wo/ locks */
7673 else
7674 {
7675 assert(m_table_info->records != ~(ha_rows)0);
7676 stats.records= m_table_info->records +
7677 m_table_info->no_uncommitted_rows_count;
7678 }
7679
7680 if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&
7681 thd->lex->sql_command != SQLCOM_SHOW_KEYS)
7682 {
7683 /*
7684 just use whatever stats we have. However,
7685 optimizer interprets the values 0 and 1 as EXACT:
7686 -> < 2 should not be returned.
7687 */
7688 if (stats.records < 2)
7689 stats.records= 2;
7690 }
7691 break;
7692 }
7693 /* RPK moved to variable part */
7694 if (flag & HA_STATUS_VARIABLE)
7695 {
7696 /* No meaningful way to return error */
7697 DBUG_PRINT("info", ("rec_per_key"));
7698 set_rec_per_key();
7699 }
7700 if (flag & HA_STATUS_ERRKEY)
7701 {
7702 DBUG_PRINT("info", ("HA_STATUS_ERRKEY dupkey=%u", m_dupkey));
7703 errkey= m_dupkey;
7704 }
7705 if (flag & HA_STATUS_AUTO)
7706 {
7707 DBUG_PRINT("info", ("HA_STATUS_AUTO"));
7708 if (m_table && table->found_next_number_field)
7709 {
7710 if (!thd)
7711 thd= current_thd;
7712 set_my_errno(check_ndb_connection(thd));
7713 if (my_errno())
7714 DBUG_RETURN(my_errno());
7715 Ndb *ndb= get_ndb(thd);
7716 Ndb_tuple_id_range_guard g(m_share);
7717
7718 Uint64 auto_increment_value64;
7719 if (ndb->readAutoIncrementValue(m_table, g.range,
7720 auto_increment_value64) == -1)
7721 {
7722 const NdbError err= ndb->getNdbError();
7723 sql_print_error("Error %lu in readAutoIncrementValue(): %s",
7724 (ulong) err.code, err.message);
7725 stats.auto_increment_value= ~(ulonglong)0;
7726 }
7727 else
7728 stats.auto_increment_value= (ulonglong)auto_increment_value64;
7729 }
7730 }
7731
7732 if(result == -1)
7733 result= HA_ERR_NO_CONNECTION;
7734
7735 DBUG_RETURN(result);
7736 }
7737
7738
get_dynamic_partition_info(ha_statistics * stat_info,ha_checksum * check_sum,uint part_id)7739 void ha_ndbcluster::get_dynamic_partition_info(ha_statistics *stat_info,
7740 ha_checksum *check_sum,
7741 uint part_id)
7742 {
7743 DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));
7744
7745 int error = 0;
7746 THD *thd = table->in_use;
7747
7748 if (!thd)
7749 thd = current_thd;
7750 if (!m_table_info)
7751 {
7752 if ((error = check_ndb_connection(thd)))
7753 goto err;
7754 }
7755 error = update_stats(thd, 1, part_id);
7756
7757 if (error == 0)
7758 {
7759 stat_info->records = stats.records;
7760 stat_info->mean_rec_length = stats.mean_rec_length;
7761 stat_info->data_file_length = stats.data_file_length;
7762 stat_info->delete_length = stats.delete_length;
7763 stat_info->max_data_file_length = stats.max_data_file_length;
7764 return;
7765 }
7766
7767 err:
7768
7769 DBUG_PRINT("warning",
7770 ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u",
7771 error));
7772 }
7773
7774
extra(enum ha_extra_function operation)7775 int ha_ndbcluster::extra(enum ha_extra_function operation)
7776 {
7777 DBUG_ENTER("extra");
7778 switch (operation) {
7779 case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/
7780 DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
7781 DBUG_PRINT("info", ("Ignoring duplicate key"));
7782 m_ignore_dup_key= TRUE;
7783 break;
7784 case HA_EXTRA_NO_IGNORE_DUP_KEY:
7785 DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
7786 m_ignore_dup_key= FALSE;
7787 break;
7788 case HA_EXTRA_IGNORE_NO_KEY:
7789 DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
7790 DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7791 m_ignore_no_key= TRUE;
7792 break;
7793 case HA_EXTRA_NO_IGNORE_NO_KEY:
7794 DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
7795 DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7796 m_ignore_no_key= FALSE;
7797 break;
7798 case HA_EXTRA_WRITE_CAN_REPLACE:
7799 DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
7800 if (!m_has_unique_index ||
7801 /*
7802 Always set if slave, quick fix for bug 27378
7803 or if manual binlog application, for bug 46662
7804 */
7805 applying_binlog(current_thd))
7806 {
7807 DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
7808 m_use_write= TRUE;
7809 }
7810 break;
7811 case HA_EXTRA_WRITE_CANNOT_REPLACE:
7812 DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
7813 DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
7814 m_use_write= FALSE;
7815 break;
7816 case HA_EXTRA_DELETE_CANNOT_BATCH:
7817 DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
7818 m_delete_cannot_batch= TRUE;
7819 break;
7820 case HA_EXTRA_UPDATE_CANNOT_BATCH:
7821 DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
7822 m_update_cannot_batch= TRUE;
7823 break;
7824 // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.
7825 case HA_EXTRA_KEYREAD:
7826 DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
7827 m_disable_pushed_join= TRUE;
7828 break;
7829 case HA_EXTRA_NO_KEYREAD:
7830 DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
7831 m_disable_pushed_join= FALSE;
7832 break;
7833 default:
7834 break;
7835 }
7836
7837 DBUG_RETURN(0);
7838 }
7839
7840
start_read_removal()7841 bool ha_ndbcluster::start_read_removal()
7842 {
7843 THD *thd= table->in_use;
7844 DBUG_ENTER("start_read_removal");
7845
7846 if (uses_blob_value(table->write_set))
7847 {
7848 DBUG_PRINT("exit", ("No! Blob field in write_set"));
7849 DBUG_RETURN(false);
7850 }
7851
7852 if (thd->lex->sql_command == SQLCOM_DELETE &&
7853 table_share->blob_fields)
7854 {
7855 DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));
7856 DBUG_RETURN(false);
7857 }
7858
7859 if (table_share->primary_key == MAX_KEY)
7860 {
7861 DBUG_PRINT("exit", ("No! Table with hidden key"));
7862 DBUG_RETURN(false);
7863 }
7864
7865 if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
7866 {
7867 DBUG_PRINT("exit", ("No! Updating primary key"));
7868 DBUG_RETURN(false);
7869 }
7870
7871 if (m_has_unique_index)
7872 {
7873 for (uint i= 0; i < table_share->keys; i++)
7874 {
7875 const KEY* key= table->key_info + i;
7876 if ((key->flags & HA_NOSAME) &&
7877 bitmap_is_overlapping(table->write_set,
7878 m_key_fields[i]))
7879 {
7880 DBUG_PRINT("exit", ("No! Unique key %d is updated", i));
7881 DBUG_RETURN(false);
7882 }
7883 }
7884 }
7885 m_read_before_write_removal_possible= TRUE;
7886 DBUG_PRINT("exit", ("Yes, rbwr is possible!"));
7887 DBUG_RETURN(true);
7888 }
7889
7890
end_read_removal(void)7891 ha_rows ha_ndbcluster::end_read_removal(void)
7892 {
7893 DBUG_ENTER("end_read_removal");
7894 assert(m_read_before_write_removal_possible);
7895 DBUG_PRINT("info", ("updated: %llu, deleted: %llu",
7896 m_rows_updated, m_rows_deleted));
7897 DBUG_RETURN(m_rows_updated + m_rows_deleted);
7898 }
7899
7900
reset()7901 int ha_ndbcluster::reset()
7902 {
7903 DBUG_ENTER("ha_ndbcluster::reset");
7904 if (m_cond)
7905 {
7906 m_cond->cond_clear();
7907 }
7908 assert(m_active_query == NULL);
7909 if (m_pushed_join_operation==PUSHED_ROOT) // Root of pushed query
7910 {
7911 delete m_pushed_join_member; // Also delete QueryDef
7912 }
7913 m_pushed_join_member= NULL;
7914 m_pushed_join_operation= -1;
7915 m_disable_pushed_join= FALSE;
7916
7917 #if 0
7918 // Magnus, disble this "hack" until it's possible to test if
7919 // it's still needed
7920 /*
7921 Regular partition pruning will set the bitmap appropriately.
7922 Some queries like ALTER TABLE doesn't use partition pruning and
7923 thus the 'used_partitions' bitmap needs to be initialized
7924 */
7925 if (m_part_info)
7926 bitmap_set_all(&m_part_info->used_partitions);
7927 #endif
7928
7929 /* reset flags set by extra calls */
7930 m_read_before_write_removal_possible= FALSE;
7931 m_read_before_write_removal_used= FALSE;
7932 m_rows_updated= m_rows_deleted= 0;
7933 m_ignore_dup_key= FALSE;
7934 m_use_write= FALSE;
7935 m_ignore_no_key= FALSE;
7936 m_rows_inserted= (ha_rows) 0;
7937 m_rows_to_insert= (ha_rows) 1;
7938 m_delete_cannot_batch= FALSE;
7939 m_update_cannot_batch= FALSE;
7940
7941 assert(m_is_bulk_delete == false);
7942 m_is_bulk_delete = false;
7943 DBUG_RETURN(0);
7944 }
7945
7946
7947 /**
7948 Start of an insert, remember number of rows to be inserted, it will
7949 be used in write_row and get_autoincrement to send an optimal number
7950 of rows in each roundtrip to the server.
7951
7952 @param
7953 rows number of rows to insert, 0 if unknown
7954 */
7955
7956 int
flush_bulk_insert(bool allow_batch)7957 ha_ndbcluster::flush_bulk_insert(bool allow_batch)
7958 {
7959 NdbTransaction *trans= m_thd_ndb->trans;
7960 DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");
7961 DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d",
7962 (int)m_rows_inserted));
7963 assert(trans);
7964
7965
7966 if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))
7967 {
7968 if (!allow_batch &&
7969 execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7970 {
7971 no_uncommitted_rows_execute_failure();
7972 DBUG_RETURN(ndb_err(trans));
7973 }
7974 }
7975 else
7976 {
7977 /*
7978 signal that transaction has been broken up and hence cannot
7979 be rolled back
7980 */
7981 THD *thd= table->in_use;
7982 thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::SESSION);
7983 thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::STMT);
7984 if (execute_commit(m_thd_ndb, trans, m_thd_ndb->m_force_send,
7985 m_ignore_no_key) != 0)
7986 {
7987 no_uncommitted_rows_execute_failure();
7988 DBUG_RETURN(ndb_err(trans));
7989 }
7990 if (trans->restart() != 0)
7991 {
7992 assert(0);
7993 DBUG_RETURN(-1);
7994 }
7995 }
7996 DBUG_RETURN(0);
7997 }
7998
start_bulk_insert(ha_rows rows)7999 void ha_ndbcluster::start_bulk_insert(ha_rows rows)
8000 {
8001 DBUG_ENTER("start_bulk_insert");
8002 DBUG_PRINT("enter", ("rows: %d", (int)rows));
8003
8004 m_rows_inserted= (ha_rows) 0;
8005 if (!m_use_write && m_ignore_dup_key)
8006 {
8007 /*
8008 compare if expression with that in write_row
8009 we have a situation where peek_indexed_rows() will be called
8010 so we cannot batch
8011 */
8012 DBUG_PRINT("info", ("Batching turned off as duplicate key is "
8013 "ignored by using peek_row"));
8014 m_rows_to_insert= 1;
8015 DBUG_VOID_RETURN;
8016 }
8017 if (rows == (ha_rows) 0)
8018 {
8019 /* We don't know how many will be inserted, guess */
8020 m_rows_to_insert=
8021 (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)
8022 ? m_autoincrement_prefetch
8023 : DEFAULT_AUTO_PREFETCH;
8024 m_autoincrement_prefetch= m_rows_to_insert;
8025 }
8026 else
8027 {
8028 m_rows_to_insert= rows;
8029 if (m_autoincrement_prefetch < m_rows_to_insert)
8030 m_autoincrement_prefetch= m_rows_to_insert;
8031 }
8032
8033 DBUG_VOID_RETURN;
8034 }
8035
8036 /**
8037 End of an insert.
8038 */
end_bulk_insert()8039 int ha_ndbcluster::end_bulk_insert()
8040 {
8041 int error= 0;
8042
8043 DBUG_ENTER("end_bulk_insert");
8044 // Check if last inserts need to be flushed
8045
8046 THD *thd= table->in_use;
8047 Thd_ndb *thd_ndb= m_thd_ndb;
8048
8049 if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)
8050 {
8051 bool allow_batch= (thd_ndb->m_handler != 0);
8052 error= flush_bulk_insert(allow_batch);
8053 if (error != 0)
8054 set_my_errno(error);
8055 }
8056
8057 m_rows_inserted= (ha_rows) 0;
8058 m_rows_to_insert= (ha_rows) 1;
8059 DBUG_RETURN(error);
8060 }
8061
8062
extra_opt(enum ha_extra_function operation,ulong cache_size)8063 int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
8064 {
8065 DBUG_ENTER("extra_opt");
8066 DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
8067 DBUG_RETURN(extra(operation));
8068 }
8069
8070 static const char *ha_ndbcluster_exts[] = {
8071 ha_ndb_ext,
8072 NullS
8073 };
8074
bas_ext() const8075 const char** ha_ndbcluster::bas_ext() const
8076 {
8077 return ha_ndbcluster_exts;
8078 }
8079
8080 /**
8081 How many seeks it will take to read through the table.
8082
8083 This is to be comparable to the number returned by records_in_range so
8084 that we can decide if we should scan the table or use keys.
8085 */
8086
scan_time()8087 double ha_ndbcluster::scan_time()
8088 {
8089 DBUG_ENTER("ha_ndbcluster::scan_time()");
8090 double res= rows2double(stats.records*1000);
8091 DBUG_PRINT("exit", ("table: %s value: %f",
8092 m_tabname, res));
8093 DBUG_RETURN(res);
8094 }
8095
8096 /*
8097 Convert MySQL table locks into locks supported by Ndb Cluster.
8098 Note that MySQL Cluster does currently not support distributed
8099 table locks, so to be safe one should set cluster in Single
8100 User Mode, before relying on table locks when updating tables
8101 from several MySQL servers
8102 */
8103
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)8104 THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
8105 THR_LOCK_DATA **to,
8106 enum thr_lock_type lock_type)
8107 {
8108 DBUG_ENTER("store_lock");
8109 if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK)
8110 {
8111
8112 /* If we are not doing a LOCK TABLE, then allow multiple
8113 writers */
8114
8115 /* Since NDB does not currently have table locks
8116 this is treated as a ordinary lock */
8117
8118 const bool in_lock_tables = thd_in_lock_tables(thd);
8119 const uint sql_command = thd_sql_command(thd);
8120 if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
8121 lock_type <= TL_WRITE) &&
8122 !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))
8123 lock_type= TL_WRITE_ALLOW_WRITE;
8124
8125 /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
8126 MySQL would use the lock TL_READ_NO_INSERT on t2, and that
8127 would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
8128 to t2. Convert the lock to a normal read lock to allow
8129 concurrent inserts to t2. */
8130
8131 if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
8132 lock_type= TL_READ;
8133
8134 /**
8135 * We need locks on source table when
8136 * doing offline alter...
8137 * In 5.1 this worked due to TL_WRITE_ALLOW_READ...
8138 * but that has been removed in 5.5
8139 * I simply add this to get it...
8140 */
8141 if (sql_command == SQLCOM_ALTER_TABLE)
8142 lock_type = TL_WRITE;
8143
8144 m_lock.type=lock_type;
8145 }
8146 *to++= &m_lock;
8147
8148 DBUG_PRINT("exit", ("lock_type: %d", lock_type));
8149
8150 DBUG_RETURN(to);
8151 }
8152
8153 /*
8154 As MySQL will execute an external lock for every new table it uses
8155 we can use this to start the transactions.
8156 If we are in auto_commit mode we just need to start a transaction
8157 for the statement, this will be stored in thd_ndb.stmt.
8158 If not, we have to start a master transaction if there doesn't exist
8159 one from before, this will be stored in thd_ndb.all
8160
8161 When a table lock is held one transaction will be started which holds
8162 the table lock and for each statement a hupp transaction will be started
8163 If we are locking the table then:
8164 - save the NdbDictionary::Table for easy access
8165 - save reference to table statistics
8166 - refresh list of the indexes for the table if needed (if altered)
8167 */
8168
8169 #ifdef HAVE_NDB_BINLOG
ndbcluster_update_apply_status(THD * thd,int do_update)8170 static int ndbcluster_update_apply_status(THD *thd, int do_update)
8171 {
8172 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8173 Ndb *ndb= thd_ndb->ndb;
8174 NDBDICT *dict= ndb->getDictionary();
8175 const NDBTAB *ndbtab;
8176 NdbTransaction *trans= thd_ndb->trans;
8177 ndb->setDatabaseName(NDB_REP_DB);
8178 Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
8179 if (!(ndbtab= ndbtab_g.get_table()))
8180 {
8181 return -1;
8182 }
8183 NdbOperation *op= 0;
8184 int r= 0;
8185 r|= (op= trans->getNdbOperation(ndbtab)) == 0;
8186 assert(r == 0);
8187 if (do_update)
8188 r|= op->updateTuple();
8189 else
8190 r|= op->writeTuple();
8191 assert(r == 0);
8192 // server_id
8193 r|= op->equal(0u, (Uint32)thd->server_id);
8194 assert(r == 0);
8195 if (!do_update)
8196 {
8197 // epoch
8198 r|= op->setValue(1u, (Uint64)0);
8199 assert(r == 0);
8200 }
8201 const char* group_master_log_name =
8202 ndb_mi_get_group_master_log_name();
8203 const Uint64 group_master_log_pos =
8204 ndb_mi_get_group_master_log_pos();
8205 const Uint64 future_event_relay_log_pos =
8206 ndb_mi_get_future_event_relay_log_pos();
8207 const Uint64 group_relay_log_pos =
8208 ndb_mi_get_group_relay_log_pos();
8209
8210 // log_name
8211 char tmp_buf[FN_REFLEN];
8212 ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
8213 group_master_log_name, (int)strlen(group_master_log_name));
8214 r|= op->setValue(2u, tmp_buf);
8215 assert(r == 0);
8216 // start_pos
8217 r|= op->setValue(3u, group_master_log_pos);
8218 assert(r == 0);
8219 // end_pos
8220 r|= op->setValue(4u, group_master_log_pos +
8221 (future_event_relay_log_pos - group_relay_log_pos));
8222 assert(r == 0);
8223 return 0;
8224 }
8225 #endif /* HAVE_NDB_BINLOG */
8226
8227
8228 void
transaction_checks()8229 Thd_ndb::transaction_checks()
8230 {
8231 THD* thd = m_thd;
8232
8233 if (thd->lex->sql_command == SQLCOM_LOAD)
8234 trans_options|= TNTO_TRANSACTIONS_OFF;
8235 else if (!thd->get_transaction()->m_flags.enabled)
8236 trans_options|= TNTO_TRANSACTIONS_OFF;
8237 else if (!THDVAR(thd, use_transactions))
8238 trans_options|= TNTO_TRANSACTIONS_OFF;
8239 m_force_send= THDVAR(thd, force_send);
8240 if (!thd->slave_thread)
8241 m_batch_size= THDVAR(thd, batch_size);
8242 else
8243 {
8244 m_batch_size= THDVAR(NULL, batch_size); /* using global value */
8245 /* Do not use hinted TC selection in slave thread */
8246 THDVAR(thd, optimized_node_selection)=
8247 THDVAR(NULL, optimized_node_selection) & 1; /* using global value */
8248 }
8249 }
8250
8251
start_statement(THD * thd,Thd_ndb * thd_ndb,uint table_count)8252 int ha_ndbcluster::start_statement(THD *thd,
8253 Thd_ndb *thd_ndb,
8254 uint table_count)
8255 {
8256 NdbTransaction *trans= thd_ndb->trans;
8257 int error;
8258 DBUG_ENTER("ha_ndbcluster::start_statement");
8259
8260 m_thd_ndb= thd_ndb;
8261 m_thd_ndb->transaction_checks();
8262
8263 if (table_count == 0)
8264 {
8265 trans_register_ha(thd, FALSE, ht, NULL);
8266 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
8267 {
8268 if (!trans)
8269 trans_register_ha(thd, TRUE, ht, NULL);
8270 thd_ndb->m_handler= NULL;
8271 }
8272 else
8273 {
8274 /*
8275 this is an autocommit, we may keep a reference to the
8276 handler to be used in the commit phase for optimization
8277 reasons, defering execute
8278 */
8279 thd_ndb->m_handler= this;
8280 }
8281 }
8282 else
8283 {
8284 /*
8285 there is more than one handler involved, execute deferal
8286 not possible
8287 */
8288 ha_ndbcluster* handler = thd_ndb->m_handler;
8289 thd_ndb->m_handler= NULL;
8290 if (handler != NULL)
8291 {
8292 /**
8293 * If we initially belived that this could be run
8294 * using execute deferal...but changed out mind
8295 * add handler to thd_ndb->open_tables like it would
8296 * have done "normally"
8297 */
8298 add_handler_to_open_tables(thd, thd_ndb, handler);
8299 }
8300 }
8301 if (!trans && table_count == 0)
8302 {
8303 assert(thd_ndb->changed_tables.is_empty() == TRUE);
8304 thd_ndb->trans_options= 0;
8305
8306 DBUG_PRINT("trans",("Possibly starting transaction"));
8307 const uint opti_node_select = THDVAR(thd, optimized_node_selection);
8308 DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));
8309 if (!(opti_node_select & 2) ||
8310 thd->lex->sql_command == SQLCOM_LOAD)
8311 if (unlikely(!start_transaction(error)))
8312 DBUG_RETURN(error);
8313
8314 thd_ndb->init_open_tables();
8315 thd_ndb->m_slow_path= FALSE;
8316 if (!(thd_options(thd) & OPTION_BIN_LOG) ||
8317 thd->variables.binlog_format == BINLOG_FORMAT_STMT)
8318 {
8319 thd_ndb->trans_options|= TNTO_NO_LOGGING;
8320 thd_ndb->m_slow_path= TRUE;
8321 }
8322 else if (thd->slave_thread)
8323 thd_ndb->m_slow_path= TRUE;
8324 }
8325 DBUG_RETURN(0);
8326 }
8327
8328 int
add_handler_to_open_tables(THD * thd,Thd_ndb * thd_ndb,ha_ndbcluster * handler)8329 ha_ndbcluster::add_handler_to_open_tables(THD *thd,
8330 Thd_ndb *thd_ndb,
8331 ha_ndbcluster* handler)
8332 {
8333 DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");
8334 DBUG_PRINT("info", ("Adding %s", handler->m_share->key_string()));
8335
8336 /**
8337 * thd_ndb->open_tables is only used iff thd_ndb->m_handler is not
8338 */
8339 assert(thd_ndb->m_handler == NULL);
8340 const void *key= handler->m_share;
8341 HASH_SEARCH_STATE state;
8342 THD_NDB_SHARE *thd_ndb_share=
8343 (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,
8344 (const uchar *)&key, sizeof(key),
8345 &state);
8346 while (thd_ndb_share && thd_ndb_share->key != key)
8347 {
8348 thd_ndb_share=
8349 (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,
8350 (const uchar *)&key, sizeof(key),
8351 &state);
8352 }
8353 if (thd_ndb_share == 0)
8354 {
8355 thd_ndb_share=
8356 (THD_NDB_SHARE *) thd->get_transaction()->allocate_memory(sizeof(THD_NDB_SHARE));
8357 if (!thd_ndb_share)
8358 {
8359 mem_alloc_error(sizeof(THD_NDB_SHARE));
8360 DBUG_RETURN(1);
8361 }
8362 thd_ndb_share->key= key;
8363 thd_ndb_share->stat.last_count= thd_ndb->count;
8364 thd_ndb_share->stat.no_uncommitted_rows_count= 0;
8365 thd_ndb_share->stat.records= ~(ha_rows)0;
8366 my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);
8367 }
8368 else if (thd_ndb_share->stat.last_count != thd_ndb->count)
8369 {
8370 thd_ndb_share->stat.last_count= thd_ndb->count;
8371 thd_ndb_share->stat.no_uncommitted_rows_count= 0;
8372 thd_ndb_share->stat.records= ~(ha_rows)0;
8373 }
8374
8375 handler->m_table_info= &thd_ndb_share->stat;
8376 DBUG_RETURN(0);
8377 }
8378
init_handler_for_statement(THD * thd)8379 int ha_ndbcluster::init_handler_for_statement(THD *thd)
8380 {
8381 /*
8382 This is the place to make sure this handler instance
8383 has a started transaction.
8384
8385 The transaction is started by the first handler on which
8386 MySQL Server calls external lock
8387
8388 Other handlers in the same stmt or transaction should use
8389 the same NDB transaction. This is done by setting up the m_thd_ndb
8390 pointer to point to the NDB transaction object.
8391 */
8392
8393 DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
8394 Thd_ndb *thd_ndb= m_thd_ndb;
8395 assert(thd_ndb);
8396
8397 // store thread specific data first to set the right context
8398 m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);
8399 // Start of transaction
8400 m_rows_changed= 0;
8401 m_blobs_pending= FALSE;
8402 release_blobs_buffer();
8403 m_slow_path= m_thd_ndb->m_slow_path;
8404 #ifdef HAVE_NDB_BINLOG
8405 if (unlikely(m_slow_path))
8406 {
8407 if (m_share == ndb_apply_status_share && thd->slave_thread)
8408 m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
8409 }
8410 #endif
8411
8412 int ret = 0;
8413 if (thd_ndb->m_handler == 0)
8414 {
8415 assert(m_share);
8416 ret = add_handler_to_open_tables(thd, thd_ndb, this);
8417 }
8418 else
8419 {
8420 struct Ndb_local_table_statistics &stat= m_table_info_instance;
8421 stat.last_count= thd_ndb->count;
8422 stat.no_uncommitted_rows_count= 0;
8423 stat.records= ~(ha_rows)0;
8424 m_table_info= &stat;
8425 }
8426 DBUG_RETURN(ret);
8427 }
8428
external_lock(THD * thd,int lock_type)8429 int ha_ndbcluster::external_lock(THD *thd, int lock_type)
8430 {
8431 DBUG_ENTER("external_lock");
8432 if (lock_type != F_UNLCK)
8433 {
8434 int error;
8435 /*
8436 Check that this handler instance has a connection
8437 set up to the Ndb object of thd
8438 */
8439 if (check_ndb_connection(thd))
8440 DBUG_RETURN(1);
8441 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8442
8443 DBUG_PRINT("enter", ("lock_type != F_UNLCK "
8444 "this: 0x%lx thd: 0x%lx thd_ndb: %lx "
8445 "thd_ndb->lock_count: %d",
8446 (long) this, (long) thd, (long) thd_ndb,
8447 thd_ndb->lock_count));
8448
8449 if ((error= start_statement(thd, thd_ndb,
8450 thd_ndb->lock_count++)))
8451 {
8452 thd_ndb->lock_count--;
8453 DBUG_RETURN(error);
8454 }
8455 if ((error= init_handler_for_statement(thd)))
8456 {
8457 thd_ndb->lock_count--;
8458 DBUG_RETURN(error);
8459 }
8460 DBUG_RETURN(0);
8461 }
8462 else
8463 {
8464 Thd_ndb *thd_ndb= m_thd_ndb;
8465 assert(thd_ndb);
8466
8467 DBUG_PRINT("enter", ("lock_type == F_UNLCK "
8468 "this: 0x%lx thd: 0x%lx thd_ndb: %lx "
8469 "thd_ndb->lock_count: %d",
8470 (long) this, (long) thd, (long) thd_ndb,
8471 thd_ndb->lock_count));
8472
8473 if (m_rows_changed && global_system_variables.query_cache_type)
8474 {
8475 DBUG_PRINT("info", ("Rows has changed"));
8476
8477 if (thd_ndb->trans &&
8478 thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
8479 {
8480 DBUG_PRINT("info", ("Add share to list of changed tables, %p",
8481 m_share));
8482 /* NOTE push_back allocates memory using transactions mem_root! */
8483 thd_ndb->changed_tables.push_back(get_share(m_share),
8484 thd->get_transaction()->transaction_memroot());
8485 }
8486
8487 if (opt_ndb_cache_check_time)
8488 {
8489 native_mutex_lock(&m_share->mutex);
8490 DBUG_PRINT("info", ("Invalidating commit_count"));
8491 m_share->commit_count= 0;
8492 m_share->commit_count_lock++;
8493 native_mutex_unlock(&m_share->mutex);
8494 }
8495 }
8496
8497 if (!--thd_ndb->lock_count)
8498 {
8499 DBUG_PRINT("trans", ("Last external_lock"));
8500
8501 if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&
8502 thd_ndb->trans)
8503 {
8504 if (thd_ndb->trans)
8505 {
8506 /*
8507 Unlock is done without a transaction commit / rollback.
8508 This happens if the thread didn't update any rows
8509 We must in this case close the transaction to release resources
8510 */
8511 DBUG_PRINT("trans",("ending non-updating transaction"));
8512 thd_ndb->ndb->closeTransaction(thd_ndb->trans);
8513 thd_ndb->trans= NULL;
8514 thd_ndb->m_handler= NULL;
8515 }
8516 }
8517 }
8518 m_table_info= NULL;
8519
8520 /*
8521 This is the place to make sure this handler instance
8522 no longer are connected to the active transaction.
8523
8524 And since the handler is no longer part of the transaction
8525 it can't have open cursors, ops, queries or blobs pending.
8526 */
8527 m_thd_ndb= NULL;
8528
8529 assert(m_active_query == NULL);
8530 if (m_active_query)
8531 DBUG_PRINT("warning", ("m_active_query != NULL"));
8532 m_active_query= NULL;
8533
8534 if (m_active_cursor)
8535 DBUG_PRINT("warning", ("m_active_cursor != NULL"));
8536 m_active_cursor= NULL;
8537
8538 if (m_multi_cursor)
8539 DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
8540 m_multi_cursor= NULL;
8541
8542 if (m_blobs_pending)
8543 DBUG_PRINT("warning", ("blobs_pending != 0"));
8544 m_blobs_pending= 0;
8545
8546 DBUG_RETURN(0);
8547 }
8548 }
8549
8550 /**
8551 Unlock the last row read in an open scan.
8552 Rows are unlocked by default in ndb, but
8553 for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
8554 locks are kept if unlock_row() is not called.
8555 */
8556
unlock_row()8557 void ha_ndbcluster::unlock_row()
8558 {
8559 DBUG_ENTER("unlock_row");
8560
8561 DBUG_PRINT("info", ("Unlocking row"));
8562 m_lock_tuple= FALSE;
8563 DBUG_VOID_RETURN;
8564 }
8565
8566 /**
8567 Start statement, used when one of the tables are locked and also when
8568 a stored function is executed.
8569
8570 start_stmt()
8571 thd Thd object
8572 lock_type Lock type on table
8573
8574 RETURN VALUE
8575 0 Success
8576 >0 Error code
8577
8578 DESCRIPTION
8579 This call indicates the start of a statement when one of the tables in
8580 the statement are locked. In this case we cannot call external_lock.
8581 It also implies that external_lock is not called at end of statement.
8582 Rather the handlerton call commit (ndbcluster_commit) is called to
8583 indicate end of transaction. There are cases thus when the commit call
8584 actually doesn't refer to a commit but only to and end of statement.
8585
8586 In the case of stored functions, one stored function is treated as one
8587 statement and the call to commit comes at the end of the stored function.
8588 */
8589
start_stmt(THD * thd,thr_lock_type lock_type)8590 int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
8591 {
8592 int error=0;
8593 Thd_ndb *thd_ndb;
8594 DBUG_ENTER("start_stmt");
8595 assert(thd == table->in_use);
8596
8597 thd_ndb= get_thd_ndb(thd);
8598 if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))
8599 goto error;
8600 if ((error= init_handler_for_statement(thd)))
8601 goto error;
8602 DBUG_RETURN(0);
8603 error:
8604 thd_ndb->start_stmt_count--;
8605 DBUG_RETURN(error);
8606 }
8607
8608 NdbTransaction *
start_transaction_row(const NdbRecord * ndb_record,const uchar * record,int & error)8609 ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,
8610 const uchar *record,
8611 int &error)
8612 {
8613 NdbTransaction *trans;
8614 DBUG_ENTER("ha_ndbcluster::start_transaction_row");
8615 assert(m_thd_ndb);
8616 assert(m_thd_ndb->trans == NULL);
8617
8618 m_thd_ndb->transaction_checks();
8619
8620 Ndb *ndb= m_thd_ndb->ndb;
8621
8622 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
8623 char *buf= (char*)&tmp[0];
8624 trans= ndb->startTransaction(ndb_record,
8625 (const char*)record,
8626 buf, sizeof(tmp));
8627
8628 if (trans)
8629 {
8630 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8631 DBUG_PRINT("info", ("Delayed allocation of TC"));
8632 DBUG_RETURN(m_thd_ndb->trans= trans);
8633 }
8634
8635 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8636 DBUG_RETURN(NULL);
8637 }
8638
8639 NdbTransaction *
start_transaction_key(uint inx_no,const uchar * key_data,int & error)8640 ha_ndbcluster::start_transaction_key(uint inx_no,
8641 const uchar *key_data,
8642 int &error)
8643 {
8644 NdbTransaction *trans;
8645 DBUG_ENTER("ha_ndbcluster::start_transaction_key");
8646 assert(m_thd_ndb);
8647 assert(m_thd_ndb->trans == NULL);
8648
8649 m_thd_ndb->transaction_checks();
8650
8651 Ndb *ndb= m_thd_ndb->ndb;
8652 const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;
8653
8654 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
8655 char *buf= (char*)&tmp[0];
8656 trans= ndb->startTransaction(key_rec,
8657 (const char*)key_data,
8658 buf, sizeof(tmp));
8659
8660 if (trans)
8661 {
8662 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8663 DBUG_PRINT("info", ("Delayed allocation of TC"));
8664 DBUG_RETURN(m_thd_ndb->trans= trans);
8665 }
8666
8667 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8668 DBUG_RETURN(NULL);
8669 }
8670
8671 NdbTransaction *
start_transaction(int & error)8672 ha_ndbcluster::start_transaction(int &error)
8673 {
8674 NdbTransaction *trans;
8675 DBUG_ENTER("ha_ndbcluster::start_transaction");
8676
8677 assert(m_thd_ndb);
8678 assert(m_thd_ndb->trans == NULL);
8679
8680 m_thd_ndb->transaction_checks();
8681
8682 const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);
8683 m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);
8684 if ((trans= m_thd_ndb->ndb->startTransaction()))
8685 {
8686 m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;
8687 DBUG_PRINT("info", ("Delayed allocation of TC"));
8688 DBUG_RETURN(m_thd_ndb->trans= trans);
8689 }
8690
8691 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8692 DBUG_RETURN(NULL);
8693 }
8694
8695 NdbTransaction *
start_transaction_part_id(Uint32 part_id,int & error)8696 ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)
8697 {
8698 NdbTransaction *trans;
8699 DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");
8700
8701 assert(m_thd_ndb);
8702 assert(m_thd_ndb->trans == NULL);
8703
8704 m_thd_ndb->transaction_checks();
8705
8706 if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))
8707 {
8708 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8709 DBUG_PRINT("info", ("Delayed allocation of TC"));
8710 DBUG_RETURN(m_thd_ndb->trans= trans);
8711 }
8712
8713 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8714 DBUG_RETURN(NULL);
8715 }
8716
8717 /**
8718 Static error print function called from static handler method
8719 ndbcluster_commit and ndbcluster_rollback.
8720 */
8721 static void
ndbcluster_print_error(int error,const NdbOperation * error_op)8722 ndbcluster_print_error(int error, const NdbOperation *error_op)
8723 {
8724 DBUG_ENTER("ndbcluster_print_error");
8725 TABLE_SHARE share;
8726 const char *tab_name= (error_op) ? error_op->getTableName() : "";
8727 if (tab_name == NULL)
8728 {
8729 assert(tab_name != NULL);
8730 tab_name= "";
8731 }
8732 share.db.str= (char*) "";
8733 share.db.length= 0;
8734 share.table_name.str= (char *) tab_name;
8735 share.table_name.length= strlen(tab_name);
8736 ha_ndbcluster error_handler(ndbcluster_hton, &share);
8737 error_handler.print_error(error, MYF(0));
8738 DBUG_VOID_RETURN;
8739 }
8740
8741
8742 /**
8743 Commit a transaction started in NDB.
8744 */
8745
ndbcluster_commit(handlerton * hton,THD * thd,bool all)8746 int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
8747 {
8748 int res= 0;
8749 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8750 Ndb *ndb= thd_ndb->ndb;
8751 NdbTransaction *trans= thd_ndb->trans;
8752 bool retry_slave_trans = false;
8753 (void) retry_slave_trans;
8754
8755 DBUG_ENTER("ndbcluster_commit");
8756 assert(ndb);
8757 DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));
8758 thd_ndb->start_stmt_count= 0;
8759 if (trans == NULL)
8760 {
8761 DBUG_PRINT("info", ("trans == NULL"));
8762 DBUG_RETURN(0);
8763 }
8764 if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
8765 {
8766 /*
8767 An odditity in the handler interface is that commit on handlerton
8768 is called to indicate end of statement only in cases where
8769 autocommit isn't used and the all flag isn't set.
8770
8771 We also leave quickly when a transaction haven't even been started,
8772 in this case we are safe that no clean up is needed. In this case
8773 the MySQL Server could handle the query without contacting the
8774 NDB kernel.
8775 */
8776 thd_ndb->save_point_count++;
8777 DBUG_PRINT("info", ("Commit before start or end-of-statement only"));
8778 DBUG_RETURN(0);
8779 }
8780 thd_ndb->save_point_count= 0;
8781
8782 #ifdef HAVE_NDB_BINLOG
8783 if (unlikely(thd_ndb->m_slow_path))
8784 {
8785 if (thd->slave_thread)
8786 ndbcluster_update_apply_status
8787 (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
8788 }
8789 #endif /* HAVE_NDB_BINLOG */
8790
8791 if (thd->slave_thread)
8792 {
8793 #ifdef HAVE_NDB_BINLOG
8794 /* If this slave transaction has included conflict detecting ops
8795 * and some defined operations are not yet sent, then perform
8796 * an execute(NoCommit) before committing, as conflict op handling
8797 * is done by execute(NoCommit)
8798 */
8799 /* TODO : Add as function */
8800 if (g_ndb_slave_state.conflict_flags & SCS_OPS_DEFINED)
8801 {
8802 if (thd_ndb->m_unsent_bytes)
8803 res = execute_no_commit(thd_ndb, trans, TRUE);
8804 }
8805
8806 if (likely(res == 0))
8807 res = g_ndb_slave_state.atConflictPreCommit(retry_slave_trans);
8808 #endif /* HAVE_NDB_BINLOG */
8809
8810 if (likely(res == 0))
8811 res= execute_commit(thd_ndb, trans, 1, TRUE);
8812
8813 update_slave_api_stats(thd_ndb->ndb);
8814 }
8815 else
8816 {
8817 if (thd_ndb->m_handler &&
8818 thd_ndb->m_handler->m_read_before_write_removal_possible)
8819 {
8820 /*
8821 This is an autocommit involving only one table and
8822 rbwr is on, thus the transaction has already been
8823 committed in exec_bulk_update() or end_bulk_delete()
8824 */
8825 DBUG_PRINT("info", ("autocommit+rbwr, transaction already committed"));
8826 const NdbTransaction::CommitStatusType commitStatus = trans->commitStatus();
8827
8828 if(commitStatus == NdbTransaction::Committed)
8829 {
8830 /* Already committed transaction to save roundtrip */
8831 assert(get_thd_ndb(current_thd)->m_error == FALSE);
8832 }
8833 else if(commitStatus == NdbTransaction::Aborted)
8834 {
8835 /* Commit failed before transaction was started */
8836 assert(get_thd_ndb(current_thd)->m_error == TRUE);
8837 }
8838 else if(commitStatus == NdbTransaction::NeedAbort)
8839 {
8840 /* Commit attempt failed and rollback is needed */
8841 res = -1;
8842
8843 }
8844 else
8845 {
8846 /* Commit was never attempted - this should not be possible */
8847 assert(commitStatus == NdbTransaction::Started || commitStatus == NdbTransaction::NotStarted);
8848 sql_print_error("found uncommitted autocommit+rbwr transaction, "
8849 "commit status: %d", commitStatus);
8850 abort();
8851 }
8852 }
8853 else
8854 {
8855 const bool ignore_error= applying_binlog(thd);
8856 res= execute_commit(thd_ndb, trans,
8857 THDVAR(thd, force_send),
8858 ignore_error);
8859 }
8860 }
8861
8862 if (res != 0)
8863 {
8864 #ifdef HAVE_NDB_BINLOG
8865 if (retry_slave_trans)
8866 {
8867 if (st_ndb_slave_state::MAX_RETRY_TRANS_COUNT >
8868 g_ndb_slave_state.retry_trans_count++)
8869 {
8870 /*
8871 Warning is necessary to cause retry from slave.cc
8872 exec_relay_log_event()
8873 */
8874 push_warning(thd, Sql_condition::SL_WARNING,
8875 ER_SLAVE_SILENT_RETRY_TRANSACTION,
8876 "Slave transaction rollback requested");
8877 /*
8878 Set retry count to zero to:
8879 1) Avoid consuming slave-temp-error retry attempts
8880 2) Ensure no inter-attempt sleep
8881
8882 Better fix : Save + restore retry count around transactional
8883 conflict handling
8884 */
8885 ndb_mi_set_relay_log_trans_retries(0);
8886 }
8887 else
8888 {
8889 /*
8890 Too many retries, print error and exit - normal
8891 too many retries mechanism will cause exit
8892 */
8893 sql_print_error("Ndb slave retried transaction %u time(s) in vain. Giving up.",
8894 st_ndb_slave_state::MAX_RETRY_TRANS_COUNT);
8895 }
8896 res= ER_GET_TEMPORARY_ERRMSG;
8897 }
8898 else
8899 #endif
8900 {
8901 const NdbError err= trans->getNdbError();
8902 const NdbOperation *error_op= trans->getNdbErrorOperation();
8903 res= ndb_to_mysql_error(&err);
8904 if (res != -1)
8905 ndbcluster_print_error(res, error_op);
8906 }
8907 }
8908 else
8909 {
8910 /* Update shared statistics for tables inserted into / deleted from*/
8911 if (thd_ndb->m_handler && // Autocommit Txn
8912 thd_ndb->m_handler->m_share &&
8913 thd_ndb->m_handler->m_table_info)
8914 {
8915 modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);
8916 }
8917
8918 /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */
8919 for (uint i= 0; i<thd_ndb->open_tables.records; i++)
8920 {
8921 THD_NDB_SHARE *thd_share=
8922 (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);
8923 modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);
8924 }
8925 }
8926
8927 ndb->closeTransaction(trans);
8928 thd_ndb->trans= NULL;
8929 thd_ndb->m_handler= NULL;
8930
8931 /* Clear commit_count for tables changed by transaction */
8932 NDB_SHARE* share;
8933 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8934 while ((share= it++))
8935 {
8936 DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8937 share));
8938 native_mutex_lock(&share->mutex);
8939 DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
8940 share->table_name, (ulong) share->commit_count));
8941 share->commit_count= 0;
8942 share->commit_count_lock++;
8943 native_mutex_unlock(&share->mutex);
8944 free_share(&share);
8945 }
8946 thd_ndb->changed_tables.empty();
8947
8948 DBUG_RETURN(res);
8949 }
8950
8951
8952 /**
8953 Rollback a transaction started in NDB.
8954 */
8955
ndbcluster_rollback(handlerton * hton,THD * thd,bool all)8956 static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
8957 {
8958 int res= 0;
8959 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8960 Ndb *ndb= thd_ndb->ndb;
8961 NdbTransaction *trans= thd_ndb->trans;
8962
8963 DBUG_ENTER("ndbcluster_rollback");
8964 DBUG_PRINT("enter", ("all: %d thd_ndb->save_point_count: %d",
8965 all, thd_ndb->save_point_count));
8966 assert(ndb);
8967 thd_ndb->start_stmt_count= 0;
8968 if (trans == NULL)
8969 {
8970 /* Ignore end-of-statement until real rollback or commit is called */
8971 DBUG_PRINT("info", ("trans == NULL"));
8972 DBUG_RETURN(0);
8973 }
8974 if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
8975 (thd_ndb->save_point_count > 0))
8976 {
8977 /*
8978 Ignore end-of-statement until real rollback or commit is called
8979 as ndb does not support rollback statement
8980 - mark that rollback was unsuccessful, this will cause full rollback
8981 of the transaction
8982 */
8983 DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));
8984 thd_mark_transaction_to_rollback(thd, 1);
8985 my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");
8986 DBUG_RETURN(0);
8987 }
8988 thd_ndb->save_point_count= 0;
8989 if (thd->slave_thread)
8990 g_ndb_slave_state.atTransactionAbort();
8991 thd_ndb->m_unsent_bytes= 0;
8992 thd_ndb->m_execute_count++;
8993 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
8994 if (trans->execute(NdbTransaction::Rollback) != 0)
8995 {
8996 const NdbError err= trans->getNdbError();
8997 const NdbOperation *error_op= trans->getNdbErrorOperation();
8998 res= ndb_to_mysql_error(&err);
8999 if (res != -1)
9000 ndbcluster_print_error(res, error_op);
9001 }
9002 ndb->closeTransaction(trans);
9003 thd_ndb->trans= NULL;
9004 thd_ndb->m_handler= NULL;
9005
9006 /* Clear list of tables changed by transaction */
9007 NDB_SHARE* share;
9008 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
9009 while ((share= it++))
9010 {
9011 DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
9012 share));
9013 free_share(&share);
9014 }
9015 thd_ndb->changed_tables.empty();
9016
9017 if (thd->slave_thread)
9018 update_slave_api_stats(thd_ndb->ndb);
9019
9020 DBUG_RETURN(res);
9021 }
9022
9023 /**
9024 * Support for create table/column modifiers
9025 * by exploiting the comment field
9026 */
9027 struct NDB_Modifier
9028 {
9029 enum { M_BOOL } m_type;
9030 const char * m_name;
9031 size_t m_name_len;
9032 bool m_found;
9033 union {
9034 bool m_val_bool;
9035 #ifdef TODO__
9036 int m_val_int;
9037 struct {
9038 const char * str;
9039 size_t len;
9040 } m_val_str;
9041 #endif
9042 };
9043 };
9044
9045 static const
9046 struct NDB_Modifier ndb_table_modifiers[] =
9047 {
9048 { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },
9049 { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
9050 };
9051
9052 static const
9053 struct NDB_Modifier ndb_column_modifiers[] =
9054 {
9055 { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },
9056 { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
9057 };
9058
9059 /**
9060 * NDB_Modifiers
9061 *
9062 * This class implements a simple parser for getting modifiers out
9063 * of a string (e.g a comment field)
9064 */
9065 class NDB_Modifiers
9066 {
9067 public:
9068 NDB_Modifiers(const NDB_Modifier modifiers[]);
9069 ~NDB_Modifiers();
9070
9071 /**
9072 * parse string-with length (not necessarily NULL terminated)
9073 */
9074 int parse(THD* thd, const char * prefix, const char * str, size_t strlen);
9075
9076 /**
9077 * Get modifier...returns NULL if unknown
9078 */
9079 const NDB_Modifier * get(const char * name) const;
9080 private:
9081 uint m_len;
9082 struct NDB_Modifier * m_modifiers;
9083
9084 int parse_modifier(THD *thd, const char * prefix,
9085 struct NDB_Modifier* m, const char * str);
9086 };
9087
9088 static
9089 bool
end_of_token(const char * str)9090 end_of_token(const char * str)
9091 {
9092 return str[0] == 0 || str[0] == ' ' || str[0] == ',';
9093 }
9094
NDB_Modifiers(const NDB_Modifier modifiers[])9095 NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])
9096 {
9097 for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)
9098 {}
9099 m_modifiers = new NDB_Modifier[m_len];
9100 memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));
9101 }
9102
~NDB_Modifiers()9103 NDB_Modifiers::~NDB_Modifiers()
9104 {
9105 delete [] m_modifiers;
9106 }
9107
9108 int
parse_modifier(THD * thd,const char * prefix,struct NDB_Modifier * m,const char * str)9109 NDB_Modifiers::parse_modifier(THD *thd,
9110 const char * prefix,
9111 struct NDB_Modifier* m,
9112 const char * str)
9113 {
9114 if (m->m_found)
9115 {
9116 push_warning_printf(thd, Sql_condition::SL_WARNING,
9117 ER_ILLEGAL_HA_CREATE_OPTION,
9118 "%s : modifier %s specified twice",
9119 prefix, m->m_name);
9120 }
9121
9122 switch(m->m_type){
9123 case NDB_Modifier::M_BOOL:
9124 if (end_of_token(str))
9125 {
9126 m->m_val_bool = true;
9127 goto found;
9128 }
9129 if (str[0] != '=')
9130 break;
9131
9132 str++;
9133 if (str[0] == '1' && end_of_token(str+1))
9134 {
9135 m->m_val_bool = true;
9136 goto found;
9137 }
9138
9139 if (str[0] == '0' && end_of_token(str+1))
9140 {
9141 m->m_val_bool = false;
9142 goto found;
9143 }
9144 }
9145
9146 {
9147 const char * end = strpbrk(str, " ,");
9148 if (end)
9149 {
9150 push_warning_printf(thd, Sql_condition::SL_WARNING,
9151 ER_ILLEGAL_HA_CREATE_OPTION,
9152 "%s : invalid value '%.*s' for %s",
9153 prefix, (int)(end - str), str, m->m_name);
9154 }
9155 else
9156 {
9157 push_warning_printf(thd, Sql_condition::SL_WARNING,
9158 ER_ILLEGAL_HA_CREATE_OPTION,
9159 "%s : invalid value '%s' for %s",
9160 prefix, str, m->m_name);
9161 }
9162 }
9163 return -1;
9164 found:
9165 m->m_found = true;
9166 return 0;
9167 }
9168
9169 int
parse(THD * thd,const char * prefix,const char * _source,size_t _source_len)9170 NDB_Modifiers::parse(THD *thd,
9171 const char * prefix,
9172 const char * _source,
9173 size_t _source_len)
9174 {
9175 if (_source == 0 || _source_len == 0)
9176 return 0;
9177
9178 const char * source = 0;
9179
9180 /**
9181 * Check if _source is NULL-terminated
9182 */
9183 for (size_t i = 0; i<_source_len; i++)
9184 {
9185 if (_source[i] == 0)
9186 {
9187 source = _source;
9188 break;
9189 }
9190 }
9191
9192 if (source == 0)
9193 {
9194 /**
9195 * Make NULL terminated string so that strXXX-functions are safe
9196 */
9197 char * tmp = new char[_source_len+1];
9198 if (tmp == 0)
9199 {
9200 push_warning_printf(thd, Sql_condition::SL_WARNING,
9201 ER_ILLEGAL_HA_CREATE_OPTION,
9202 "%s : unable to parse due to out of memory",
9203 prefix);
9204 return -1;
9205 }
9206 memcpy(tmp, _source, _source_len);
9207 tmp[_source_len] = 0;
9208 source = tmp;
9209 }
9210
9211 const char * pos = source;
9212 if ((pos = strstr(pos, prefix)) == 0)
9213 {
9214 if (source != _source)
9215 delete [] source;
9216 return 0;
9217 }
9218
9219 pos += strlen(prefix);
9220
9221 while (pos && pos[0] != 0 && pos[0] != ' ')
9222 {
9223 const char * end = strpbrk(pos, " ,"); // end of current modifier
9224
9225 for (uint i = 0; i < m_len; i++)
9226 {
9227 size_t l = m_modifiers[i].m_name_len;
9228 if (strncmp(pos, m_modifiers[i].m_name, l) == 0)
9229 {
9230 /**
9231 * Found modifier...
9232 */
9233
9234 if (! (end_of_token(pos + l) || pos[l] == '='))
9235 goto unknown;
9236
9237 pos += l;
9238 int res = parse_modifier(thd, prefix, m_modifiers+i, pos);
9239
9240 if (res == -1)
9241 {
9242 /**
9243 * We continue parsing even if modifier had error
9244 */
9245 }
9246
9247 goto next;
9248 }
9249 }
9250
9251 {
9252 unknown:
9253 if (end)
9254 {
9255 push_warning_printf(thd, Sql_condition::SL_WARNING,
9256 ER_ILLEGAL_HA_CREATE_OPTION,
9257 "%s : unknown modifier: %.*s",
9258 prefix, (int)(end - pos), pos);
9259 }
9260 else
9261 {
9262 push_warning_printf(thd, Sql_condition::SL_WARNING,
9263 ER_ILLEGAL_HA_CREATE_OPTION,
9264 "%s : unknown modifier: %s",
9265 prefix, pos);
9266 }
9267 }
9268
9269 next:
9270 pos = end;
9271 if (pos && pos[0] == ',')
9272 pos++;
9273 }
9274
9275 if (source != _source)
9276 delete [] source;
9277
9278 return 0;
9279 }
9280
9281 const NDB_Modifier *
get(const char * name) const9282 NDB_Modifiers::get(const char * name) const
9283 {
9284 for (uint i = 0; i < m_len; i++)
9285 {
9286 if (strcmp(name, m_modifiers[i].m_name) == 0)
9287 {
9288 return m_modifiers + i;
9289 }
9290 }
9291 return 0;
9292 }
9293
9294 /**
9295 Define NDB column based on Field.
9296
9297 Not member of ha_ndbcluster because NDBCOL cannot be declared.
9298
9299 MySQL text types with character set "binary" are mapped to true
9300 NDB binary types without a character set.
9301
9302 Blobs are V2 and striping from mysql level is not supported
9303 due to lack of syntax and lack of support for partitioning.
9304
9305 @return
9306 Returns 0 or mysql error code.
9307 */
9308
9309 static bool
ndb_blob_striping()9310 ndb_blob_striping()
9311 {
9312 #ifndef NDEBUG
9313 const char* p= getenv("NDB_BLOB_STRIPING");
9314 if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
9315 return true;
9316 #endif
9317 return false;
9318 }
9319
9320 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
9321 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;
9322 #else
9323 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;
9324 #endif
9325
9326 static int
create_ndb_column(THD * thd,NDBCOL & col,Field * field,HA_CREATE_INFO * create_info,column_format_type default_format=COLUMN_FORMAT_TYPE_DEFAULT)9327 create_ndb_column(THD *thd,
9328 NDBCOL &col,
9329 Field *field,
9330 HA_CREATE_INFO *create_info,
9331 column_format_type default_format= COLUMN_FORMAT_TYPE_DEFAULT)
9332 {
9333 NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;
9334 bool dynamic= FALSE;
9335
9336 char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];
9337 DBUG_ENTER("create_ndb_column");
9338 // Set name
9339 if (col.setName(field->field_name))
9340 {
9341 set_my_errno(errno);
9342 DBUG_RETURN(errno);
9343 }
9344 // Get char set
9345 CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());
9346 // Set type and sizes
9347 const enum enum_field_types mysql_type= field->real_type();
9348
9349 NDB_Modifiers column_modifiers(ndb_column_modifiers);
9350 column_modifiers.parse(thd, "NDB_COLUMN=",
9351 field->comment.str,
9352 field->comment.length);
9353
9354 const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");
9355
9356 {
9357 /* Clear default value (col obj is reused for whole table def) */
9358 col.setDefaultValue(NULL, 0);
9359
9360 /* If the data nodes are capable then set native
9361 * default.
9362 */
9363 bool nativeDefaults =
9364 ! (thd &&
9365 (! ndb_native_default_support(get_thd_ndb(thd)->
9366 ndb->getMinDbNodeVersion())));
9367
9368 if (likely( nativeDefaults ))
9369 {
9370 if ((!(field->flags & PRI_KEY_FLAG) ) &&
9371 type_supports_default_value(mysql_type))
9372 {
9373 if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
9374 {
9375 my_ptrdiff_t src_offset= field->table->default_values_offset();
9376 if ((! field->is_real_null(src_offset)) ||
9377 ((field->flags & NOT_NULL_FLAG)))
9378 {
9379 /* Set a non-null native default */
9380 memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);
9381 get_default_value(buf, field);
9382
9383 /* For bit columns, default length is rounded up to
9384 nearest word, ensuring all data sent
9385 */
9386 Uint32 defaultLen = field_used_length(field);
9387 if(field->type() == MYSQL_TYPE_BIT)
9388 defaultLen = ((defaultLen + 3) /4) * 4;
9389 col.setDefaultValue(buf, defaultLen);
9390 }
9391 }
9392 }
9393 }
9394 }
9395 switch (mysql_type) {
9396 // Numeric types
9397 case MYSQL_TYPE_TINY:
9398 if (field->flags & UNSIGNED_FLAG)
9399 col.setType(NDBCOL::Tinyunsigned);
9400 else
9401 col.setType(NDBCOL::Tinyint);
9402 col.setLength(1);
9403 break;
9404 case MYSQL_TYPE_SHORT:
9405 if (field->flags & UNSIGNED_FLAG)
9406 col.setType(NDBCOL::Smallunsigned);
9407 else
9408 col.setType(NDBCOL::Smallint);
9409 col.setLength(1);
9410 break;
9411 case MYSQL_TYPE_LONG:
9412 if (field->flags & UNSIGNED_FLAG)
9413 col.setType(NDBCOL::Unsigned);
9414 else
9415 col.setType(NDBCOL::Int);
9416 col.setLength(1);
9417 break;
9418 case MYSQL_TYPE_INT24:
9419 if (field->flags & UNSIGNED_FLAG)
9420 col.setType(NDBCOL::Mediumunsigned);
9421 else
9422 col.setType(NDBCOL::Mediumint);
9423 col.setLength(1);
9424 break;
9425 case MYSQL_TYPE_LONGLONG:
9426 if (field->flags & UNSIGNED_FLAG)
9427 col.setType(NDBCOL::Bigunsigned);
9428 else
9429 col.setType(NDBCOL::Bigint);
9430 col.setLength(1);
9431 break;
9432 case MYSQL_TYPE_FLOAT:
9433 col.setType(NDBCOL::Float);
9434 col.setLength(1);
9435 break;
9436 case MYSQL_TYPE_DOUBLE:
9437 col.setType(NDBCOL::Double);
9438 col.setLength(1);
9439 break;
9440 case MYSQL_TYPE_DECIMAL:
9441 {
9442 Field_decimal *f= (Field_decimal*)field;
9443 uint precision= f->pack_length();
9444 uint scale= f->decimals();
9445 if (field->flags & UNSIGNED_FLAG)
9446 {
9447 col.setType(NDBCOL::Olddecimalunsigned);
9448 precision-= (scale > 0);
9449 }
9450 else
9451 {
9452 col.setType(NDBCOL::Olddecimal);
9453 precision-= 1 + (scale > 0);
9454 }
9455 col.setPrecision(precision);
9456 col.setScale(scale);
9457 col.setLength(1);
9458 }
9459 break;
9460 case MYSQL_TYPE_NEWDECIMAL:
9461 {
9462 Field_new_decimal *f= (Field_new_decimal*)field;
9463 uint precision= f->precision;
9464 uint scale= f->decimals();
9465 if (field->flags & UNSIGNED_FLAG)
9466 {
9467 col.setType(NDBCOL::Decimalunsigned);
9468 }
9469 else
9470 {
9471 col.setType(NDBCOL::Decimal);
9472 }
9473 col.setPrecision(precision);
9474 col.setScale(scale);
9475 col.setLength(1);
9476 }
9477 break;
9478 // Date types
9479 case MYSQL_TYPE_DATETIME:
9480 col.setType(NDBCOL::Datetime);
9481 col.setLength(1);
9482 break;
9483 case MYSQL_TYPE_DATETIME2:
9484 {
9485 Field_datetimef *f= (Field_datetimef*)field;
9486 uint prec= f->decimals();
9487 col.setType(NDBCOL::Datetime2);
9488 col.setLength(1);
9489 col.setPrecision(prec);
9490 }
9491 break;
9492 case MYSQL_TYPE_DATE: // ?
9493 col.setType(NDBCOL::Char);
9494 col.setLength(field->pack_length());
9495 break;
9496 case MYSQL_TYPE_NEWDATE:
9497 col.setType(NDBCOL::Date);
9498 col.setLength(1);
9499 break;
9500 case MYSQL_TYPE_TIME:
9501 col.setType(NDBCOL::Time);
9502 col.setLength(1);
9503 break;
9504 case MYSQL_TYPE_TIME2:
9505 {
9506 Field_timef *f= (Field_timef*)field;
9507 uint prec= f->decimals();
9508 col.setType(NDBCOL::Time2);
9509 col.setLength(1);
9510 col.setPrecision(prec);
9511 }
9512 break;
9513 case MYSQL_TYPE_YEAR:
9514 col.setType(NDBCOL::Year);
9515 col.setLength(1);
9516 break;
9517 case MYSQL_TYPE_TIMESTAMP:
9518 col.setType(NDBCOL::Timestamp);
9519 col.setLength(1);
9520 break;
9521 case MYSQL_TYPE_TIMESTAMP2:
9522 {
9523 Field_timestampf *f= (Field_timestampf*)field;
9524 uint prec= f->decimals();
9525 col.setType(NDBCOL::Timestamp2);
9526 col.setLength(1);
9527 col.setPrecision(prec);
9528 }
9529 break;
9530 // Char types
9531 case MYSQL_TYPE_STRING:
9532 if (field->pack_length() == 0)
9533 {
9534 col.setType(NDBCOL::Bit);
9535 col.setLength(1);
9536 }
9537 else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9538 {
9539 col.setType(NDBCOL::Binary);
9540 col.setLength(field->pack_length());
9541 }
9542 else
9543 {
9544 col.setType(NDBCOL::Char);
9545 col.setCharset(cs);
9546 col.setLength(field->pack_length());
9547 }
9548 break;
9549 case MYSQL_TYPE_VAR_STRING: // ?
9550 case MYSQL_TYPE_VARCHAR:
9551 {
9552 Field_varstring* f= (Field_varstring*)field;
9553 if (f->length_bytes == 1)
9554 {
9555 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9556 col.setType(NDBCOL::Varbinary);
9557 else {
9558 col.setType(NDBCOL::Varchar);
9559 col.setCharset(cs);
9560 }
9561 }
9562 else if (f->length_bytes == 2)
9563 {
9564 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9565 col.setType(NDBCOL::Longvarbinary);
9566 else {
9567 col.setType(NDBCOL::Longvarchar);
9568 col.setCharset(cs);
9569 }
9570 }
9571 else
9572 {
9573 DBUG_RETURN(HA_ERR_UNSUPPORTED);
9574 }
9575 col.setLength(field->field_length);
9576 }
9577 break;
9578 // Blob types (all come in as MYSQL_TYPE_BLOB)
9579 mysql_type_tiny_blob:
9580 case MYSQL_TYPE_TINY_BLOB:
9581 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9582 col.setType(NDBCOL::Blob);
9583 else {
9584 col.setType(NDBCOL::Text);
9585 col.setCharset(cs);
9586 }
9587 col.setInlineSize(256);
9588 // No parts
9589 col.setPartSize(0);
9590 col.setStripeSize(ndb_blob_striping() ? 0 : 0);
9591 break;
9592 //mysql_type_blob:
9593 case MYSQL_TYPE_GEOMETRY:
9594 case MYSQL_TYPE_BLOB:
9595 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9596 col.setType(NDBCOL::Blob);
9597 else {
9598 col.setType(NDBCOL::Text);
9599 col.setCharset(cs);
9600 }
9601 {
9602 Field_blob *field_blob= (Field_blob *)field;
9603 /*
9604 * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
9605 * Tinyblob gets no blob parts. The other cases are just a crude
9606 * way to control part size and striping.
9607 *
9608 * In mysql blob(256) is promoted to blob(65535) so it does not
9609 * in fact fit "inline" in NDB.
9610 */
9611 if (field_blob->max_data_length() < (1 << 8))
9612 goto mysql_type_tiny_blob;
9613 else if (field_blob->max_data_length() < (1 << 16))
9614 {
9615 col.setInlineSize(256);
9616 col.setPartSize(2000);
9617 col.setStripeSize(ndb_blob_striping() ? 16 : 0);
9618 if (mod_maxblob->m_found)
9619 {
9620 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9621 }
9622 }
9623 else if (field_blob->max_data_length() < (1 << 24))
9624 goto mysql_type_medium_blob;
9625 else
9626 goto mysql_type_long_blob;
9627 }
9628 break;
9629 mysql_type_medium_blob:
9630 case MYSQL_TYPE_MEDIUM_BLOB:
9631 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9632 col.setType(NDBCOL::Blob);
9633 else {
9634 col.setType(NDBCOL::Text);
9635 col.setCharset(cs);
9636 }
9637 col.setInlineSize(256);
9638 col.setPartSize(4000);
9639 col.setStripeSize(ndb_blob_striping() ? 8 : 0);
9640 if (mod_maxblob->m_found)
9641 {
9642 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9643 }
9644 break;
9645 mysql_type_long_blob:
9646 case MYSQL_TYPE_LONG_BLOB:
9647 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9648 col.setType(NDBCOL::Blob);
9649 else {
9650 col.setType(NDBCOL::Text);
9651 col.setCharset(cs);
9652 }
9653 col.setInlineSize(256);
9654 col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9655 col.setStripeSize(ndb_blob_striping() ? 4 : 0);
9656 if (mod_maxblob->m_found)
9657 {
9658 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9659 }
9660 break;
9661
9662 // MySQL 5.7 binary-encoded JSON type
9663 case MYSQL_TYPE_JSON:
9664 {
9665 /*
9666 JSON columns are just like LONG BLOB columns except for inline size
9667 and part size. Inline size is chosen to accommodate a large number
9668 of embedded json documents without spilling over to the part table.
9669 The tradeoff is that only three JSON columns can be defined in a table
9670 due to the large inline size. Part size is chosen to optimize use of
9671 pages in the part table. Note that much of the JSON functionality is
9672 available by storing JSON documents in VARCHAR columns, including
9673 extracting keys from documents to be used as indexes.
9674 */
9675 const int NDB_JSON_INLINE_SIZE = 4000;
9676 const int NDB_JSON_PART_SIZE = 8100;
9677
9678 col.setType(NDBCOL::Blob);
9679 col.setInlineSize(NDB_JSON_INLINE_SIZE);
9680 col.setPartSize(NDB_JSON_PART_SIZE);
9681 col.setStripeSize(ndb_blob_striping() ? 16 : 0);
9682 break;
9683 }
9684
9685 // Other types
9686 case MYSQL_TYPE_ENUM:
9687 col.setType(NDBCOL::Char);
9688 col.setLength(field->pack_length());
9689 break;
9690 case MYSQL_TYPE_SET:
9691 col.setType(NDBCOL::Char);
9692 col.setLength(field->pack_length());
9693 break;
9694 case MYSQL_TYPE_BIT:
9695 {
9696 int no_of_bits= field->field_length;
9697 col.setType(NDBCOL::Bit);
9698 if (!no_of_bits)
9699 col.setLength(1);
9700 else
9701 col.setLength(no_of_bits);
9702 break;
9703 }
9704 case MYSQL_TYPE_NULL:
9705 goto mysql_type_unsupported;
9706 mysql_type_unsupported:
9707 default:
9708 DBUG_RETURN(HA_ERR_UNSUPPORTED);
9709 }
9710 // Set nullable and pk
9711 col.setNullable(field->maybe_null());
9712 col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
9713 if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)
9714 {
9715 col.setPartitionKey(TRUE);
9716 }
9717
9718 // Set autoincrement
9719 if (field->flags & AUTO_INCREMENT_FLAG)
9720 {
9721 col.setAutoIncrement(TRUE);
9722 ulonglong value= create_info->auto_increment_value ?
9723 create_info->auto_increment_value : (ulonglong) 1;
9724 DBUG_PRINT("info", ("Autoincrement key, initial: %llu", value));
9725 col.setAutoIncrementInitialValue(value);
9726 }
9727 else
9728 col.setAutoIncrement(FALSE);
9729
9730 DBUG_PRINT("info", ("storage: %u format: %u ",
9731 field->field_storage_type(),
9732 field->column_format()));
9733 switch (field->field_storage_type()) {
9734 case(HA_SM_DEFAULT):
9735 default:
9736 if (create_info->storage_media == HA_SM_DISK)
9737 type= NDBCOL::StorageTypeDisk;
9738 else
9739 type= NDBCOL::StorageTypeMemory;
9740 break;
9741 case(HA_SM_DISK):
9742 type= NDBCOL::StorageTypeDisk;
9743 break;
9744 case(HA_SM_MEMORY):
9745 type= NDBCOL::StorageTypeMemory;
9746 break;
9747 }
9748
9749 switch (field->column_format()) {
9750 case(COLUMN_FORMAT_TYPE_FIXED):
9751 dynamic= FALSE;
9752 break;
9753 case(COLUMN_FORMAT_TYPE_DYNAMIC):
9754 dynamic= TRUE;
9755 break;
9756 case(COLUMN_FORMAT_TYPE_DEFAULT):
9757 default:
9758 if (create_info->row_type == ROW_TYPE_DEFAULT)
9759 dynamic= default_format;
9760 else
9761 dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);
9762 break;
9763 }
9764 DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,
9765 (dynamic) ? "dynamic" : "static"));
9766 if (type == NDBCOL::StorageTypeDisk)
9767 {
9768 if (dynamic)
9769 {
9770 DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",
9771 field->field_name));
9772 dynamic= false;
9773 }
9774
9775 if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)
9776 {
9777 push_warning_printf(thd, Sql_condition::SL_WARNING,
9778 ER_ILLEGAL_HA_CREATE_OPTION,
9779 "DYNAMIC column %s with "
9780 "STORAGE DISK is not supported, "
9781 "column will become FIXED",
9782 field->field_name);
9783 }
9784 }
9785
9786 switch (create_info->row_type) {
9787 case ROW_TYPE_FIXED:
9788 if (thd && (dynamic || field_type_forces_var_part(field->type())))
9789 {
9790 push_warning_printf(thd, Sql_condition::SL_WARNING,
9791 ER_ILLEGAL_HA_CREATE_OPTION,
9792 "Row format FIXED incompatible with "
9793 "dynamic attribute %s",
9794 field->field_name);
9795 }
9796 break;
9797 case ROW_TYPE_DYNAMIC:
9798 /*
9799 Future: make columns dynamic in this case
9800 */
9801 break;
9802 default:
9803 break;
9804 }
9805
9806 DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));
9807 col.setStorageType(type);
9808 col.setDynamic(dynamic);
9809
9810 DBUG_RETURN(0);
9811 }
9812
update_create_info(HA_CREATE_INFO * create_info)9813 void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)
9814 {
9815 DBUG_ENTER("ha_ndbcluster::update_create_info");
9816 THD *thd= current_thd;
9817 const NDBTAB *ndbtab= m_table;
9818 Ndb *ndb= check_ndb_in_thd(thd);
9819
9820 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
9821 {
9822 /*
9823 Find any initial auto_increment value
9824 */
9825 for (uint i= 0; i < table->s->fields; i++)
9826 {
9827 Field *field= table->field[i];
9828 if (field->flags & AUTO_INCREMENT_FLAG)
9829 {
9830 ulonglong auto_value;
9831 uint retries= NDB_AUTO_INCREMENT_RETRIES;
9832 int retry_sleep= 30; /* 30 milliseconds, transaction */
9833 for (;;)
9834 {
9835 Ndb_tuple_id_range_guard g(m_share);
9836 if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))
9837 {
9838 if (--retries && !thd->killed &&
9839 ndb->getNdbError().status == NdbError::TemporaryError)
9840 {
9841 do_retry_sleep(retry_sleep);
9842 continue;
9843 }
9844 const NdbError err= ndb->getNdbError();
9845 sql_print_error("Error %lu in ::update_create_info(): %s",
9846 (ulong) err.code, err.message);
9847 DBUG_VOID_RETURN;
9848 }
9849 break;
9850 }
9851 if (auto_value > 1)
9852 {
9853 create_info->auto_increment_value= auto_value;
9854 }
9855 break;
9856 }
9857 }
9858 }
9859
9860 /*
9861 FK data is handled in get_metadata and release_metadata but
9862 for some reason it is not enough
9863 */
9864 if (1)
9865 {
9866 int error= get_fk_data(thd, ndb);
9867 if (error != 0)
9868 {
9869 sql_print_error("update_create_info: get FK data: error %d", error);
9870 DBUG_VOID_RETURN;
9871 }
9872 }
9873
9874 DBUG_VOID_RETURN;
9875 }
9876
9877 /*
9878 Create a table in NDB Cluster
9879 */
get_no_fragments(ulonglong max_rows)9880 static uint get_no_fragments(ulonglong max_rows)
9881 {
9882 ulonglong acc_row_size= 25 + /*safety margin*/ 2;
9883 ulonglong acc_fragment_size= 512*1024*1024;
9884 return uint((max_rows*acc_row_size)/acc_fragment_size)+1;
9885 }
9886
9887
9888 /*
9889 Routine to adjust default number of partitions to always be a multiple
9890 of number of nodes and never more than 4 times the number of nodes.
9891
9892 */
9893 static
9894 bool
adjusted_frag_count(Ndb * ndb,uint requested_frags,uint & reported_frags)9895 adjusted_frag_count(Ndb* ndb,
9896 uint requested_frags,
9897 uint &reported_frags)
9898 {
9899 unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();
9900 unsigned no_replicas= no_nodes == 1 ? 1 : 2;
9901
9902 unsigned no_threads= 1;
9903 const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;
9904
9905 {
9906 /**
9907 * Use SYSTAB_0 to get #replicas, and to guess #threads
9908 */
9909 char dbname[FN_HEADLEN+1];
9910 dbname[FN_HEADLEN]= 0;
9911 my_stpnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);
9912 ndb->setDatabaseName("sys");
9913 Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");
9914 const NdbDictionary::Table * tab = ndbtab_g.get_table();
9915 if (tab)
9916 {
9917 no_replicas= ndbtab_g.get_table()->getReplicaCount();
9918
9919 /**
9920 * Guess #threads
9921 */
9922 {
9923 const Uint32 frags = tab->getFragmentCount();
9924 Uint32 node = 0;
9925 Uint32 cnt = 0;
9926 for (Uint32 i = 0; i<frags; i++)
9927 {
9928 Uint32 replicas[4];
9929 if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))
9930 {
9931 if (node == replicas[0] || node == 0)
9932 {
9933 node = replicas[0];
9934 cnt ++;
9935 }
9936 }
9937 }
9938 no_threads = cnt; // No of primary replica on 1-node
9939 }
9940 }
9941 ndb->setDatabaseName(dbname);
9942 }
9943
9944 const unsigned usable_nodes = no_replicas * no_nodegroups;
9945 const uint max_replicas = 8 * usable_nodes * no_threads;
9946
9947 reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads
9948 Uint32 replicas = reported_frags * no_replicas;
9949
9950 /**
9951 * Loop until requested replicas, and not exceed max-replicas
9952 */
9953 while (reported_frags < requested_frags &&
9954 (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)
9955 {
9956 reported_frags += usable_nodes * no_threads;
9957 replicas += usable_nodes * no_threads * no_replicas;
9958 }
9959
9960 return (reported_frags < requested_frags);
9961 }
9962
9963
9964 extern bool ndb_fk_util_truncate_allowed(THD* thd,
9965 NdbDictionary::Dictionary* dict,
9966 const char* db,
9967 const NdbDictionary::Table* tab,
9968 bool& allow);
9969
9970 /*
9971 Forward declaration of the utility functions used
9972 when creating partitioned tables
9973 */
9974 static int
9975 create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
9976 partition_info *part_info,
9977 NdbDictionary::Table&);
9978 static int
9979 create_table_set_range_data(const partition_info* part_info,
9980 NdbDictionary::Table&);
9981 static int
9982 create_table_set_list_data(const partition_info* part_info,
9983 NdbDictionary::Table&);
9984
9985
9986 /**
9987 Create a table in NDB Cluster
9988 */
9989
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)9990 int ha_ndbcluster::create(const char *name,
9991 TABLE *form,
9992 HA_CREATE_INFO *create_info)
9993 {
9994 THD *thd= current_thd;
9995 NDBTAB tab;
9996 NDBCOL col;
9997 size_t pack_length, length;
9998 uint i, pk_length= 0;
9999 uchar *data= NULL, *pack_data= NULL;
10000 bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);
10001 bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
10002 bool is_alter= (thd->lex->sql_command == SQLCOM_ALTER_TABLE);
10003 bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
10004 bool use_disk= FALSE;
10005 NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
10006 bool ndb_sys_table= FALSE;
10007 int result= 0;
10008 NdbDictionary::ObjectId objId;
10009 Ndb_fk_list fk_list_for_truncate;
10010
10011 DBUG_ENTER("ha_ndbcluster::create");
10012 DBUG_PRINT("enter", ("name: %s", name));
10013
10014 if (create_temporary)
10015 {
10016 /*
10017 Ndb does not support temporary tables
10018 */
10019 set_my_errno(ER_ILLEGAL_HA_CREATE_OPTION);
10020 DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));
10021 push_warning_printf(thd, Sql_condition::SL_WARNING,
10022 ER_ILLEGAL_HA_CREATE_OPTION,
10023 "Ndb doesn't support temporary tables");
10024 DBUG_RETURN(my_errno());
10025 }
10026
10027 assert(*fn_rext((char*)name) == 0);
10028 set_dbname(name);
10029 set_tabname(name);
10030
10031 /*
10032 Check that database name and table name will fit within limits
10033 */
10034 if (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
10035 strlen(m_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
10036 {
10037 char *invalid_identifier=
10038 (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE)?m_dbname:m_tabname;
10039 push_warning_printf(thd, Sql_condition::SL_WARNING,
10040 ER_TOO_LONG_IDENT,
10041 "Ndb has an internal limit of %u bytes on the size of schema identifiers",
10042 NDB_MAX_DDL_NAME_BYTESIZE);
10043 my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
10044 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
10045 }
10046
10047 set_my_errno(check_ndb_connection(thd));
10048 if (my_errno())
10049 DBUG_RETURN(my_errno());
10050
10051 Ndb *ndb= get_ndb(thd);
10052 NDBDICT *dict= ndb->getDictionary();
10053
10054 table= form;
10055 if (create_from_engine)
10056 {
10057 /*
10058 Table already exists in NDB and frm file has been created by
10059 caller.
10060 Do Ndb specific stuff, such as create a .ndb file
10061 */
10062 set_my_errno(write_ndb_file(name));
10063 if (my_errno())
10064 DBUG_RETURN(my_errno());
10065
10066 ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
10067 m_dbname, m_tabname, form);
10068 if (my_errno() == HA_ERR_TABLE_EXIST)
10069 {
10070 push_warning_printf(thd, Sql_condition::SL_WARNING,
10071 ER_TABLE_EXISTS_ERROR,
10072 "Failed to setup replication of table %s.%s",
10073 m_dbname, m_tabname);
10074 set_my_errno(0);
10075 }
10076
10077
10078 DBUG_RETURN(my_errno());
10079 }
10080
10081 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10082
10083 if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||
10084 thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))
10085
10086 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10087
10088
10089 if (!ndb_schema_dist_is_ready())
10090 {
10091 /*
10092 Don't allow table creation unless schema distribution is ready
10093 ( unless it is a creation of the schema dist table itself )
10094 */
10095 if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
10096 strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
10097 {
10098 DBUG_PRINT("info", ("Schema distribution table not setup"));
10099 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10100 }
10101 single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
10102 ndb_sys_table= TRUE;
10103 }
10104
10105 if (!ndb_apply_status_share)
10106 {
10107 if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&
10108 strcmp(m_tabname, NDB_APPLY_TABLE) == 0))
10109 {
10110 ndb_sys_table= TRUE;
10111 }
10112 }
10113
10114 if (is_truncate)
10115 {
10116 Ndb_table_guard ndbtab_g(dict);
10117 ndbtab_g.init(m_tabname);
10118 if (!ndbtab_g.get_table())
10119 ERR_RETURN(dict->getNdbError());
10120
10121 /*
10122 Don't allow truncate on table which is foreign key parent.
10123 This is kind of a kludge to get legacy compatibility behaviour
10124 but it also reduces the complexity involved in rewriting
10125 fks during this "recreate".
10126 */
10127 bool allow;
10128 if (!ndb_fk_util_truncate_allowed(thd, dict, m_dbname,
10129 ndbtab_g.get_table(), allow))
10130 {
10131 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10132 }
10133 if (!allow)
10134 {
10135 my_error(ER_TRUNCATE_ILLEGAL_FK, MYF(0), "");
10136 DBUG_RETURN(1);
10137 }
10138
10139 /* save the foreign key information in fk_list */
10140 int err;
10141 if ((err= get_fk_data_for_truncate(dict, ndbtab_g.get_table(),
10142 fk_list_for_truncate)))
10143 DBUG_RETURN(err);
10144
10145 DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
10146 if ((err= delete_table(name)))
10147 DBUG_RETURN(err);
10148 ndbtab_g.reinit();
10149 }
10150
10151 NDB_Modifiers table_modifiers(ndb_table_modifiers);
10152 table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
10153 create_info->comment.length);
10154 const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");
10155
10156 #ifdef HAVE_NDB_BINLOG
10157 /* Read ndb_replication entry for this table, if any */
10158 Uint32 binlog_flags;
10159 const st_conflict_fn_def* conflict_fn= NULL;
10160 st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
10161 Uint32 num_args = MAX_CONFLICT_ARGS;
10162
10163 int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,
10164 ndb,
10165 m_dbname,
10166 m_tabname,
10167 ::server_id,
10168 &binlog_flags,
10169 &conflict_fn,
10170 args,
10171 &num_args);
10172 if (rep_read_rc != 0)
10173 {
10174 DBUG_RETURN(rep_read_rc);
10175 }
10176
10177 /* Reset database name */
10178 ndb->setDatabaseName(m_dbname);
10179
10180 /* TODO : Add as per conflict function 'virtual' */
10181 /* Use ndb_replication information as required */
10182 if (conflict_fn != NULL)
10183 {
10184 switch(conflict_fn->type)
10185 {
10186 case CFT_NDB_EPOCH:
10187 case CFT_NDB_EPOCH_TRANS:
10188 case CFT_NDB_EPOCH2:
10189 case CFT_NDB_EPOCH2_TRANS:
10190 {
10191 /* Default 6 extra Gci bits allows 2^6 == 64
10192 * epochs / saveGCP, a comfortable default
10193 */
10194 Uint32 numExtraGciBits = 6;
10195 Uint32 numExtraAuthorBits = 1;
10196
10197 if ((num_args == 1) &&
10198 (args[0].type == CFAT_EXTRA_GCI_BITS))
10199 {
10200 numExtraGciBits = args[0].extraGciBits;
10201 }
10202 DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "
10203 "ExtraAuthorBits to %u",
10204 numExtraGciBits,
10205 numExtraAuthorBits));
10206
10207 tab.setExtraRowGciBits(numExtraGciBits);
10208 tab.setExtraRowAuthorBits(numExtraAuthorBits);
10209 }
10210 default:
10211 break;
10212 }
10213 }
10214 #endif
10215
10216 if ((dict->beginSchemaTrans() == -1))
10217 {
10218 DBUG_PRINT("info", ("Failed to start schema transaction"));
10219 goto err_return;
10220 }
10221 DBUG_PRINT("info", ("Started schema transaction"));
10222
10223 DBUG_PRINT("table", ("name: %s", m_tabname));
10224 if (tab.setName(m_tabname))
10225 {
10226 set_my_errno(errno);
10227 goto abort;
10228 }
10229 if (!ndb_sys_table)
10230 {
10231 if (THDVAR(thd, table_temporary))
10232 {
10233 #ifdef DOES_NOT_WORK_CURRENTLY
10234 tab.setTemporary(TRUE);
10235 #endif
10236 tab.setLogging(FALSE);
10237 }
10238 else if (THDVAR(thd, table_no_logging))
10239 {
10240 tab.setLogging(FALSE);
10241 }
10242
10243 if (mod_nologging->m_found)
10244 {
10245 tab.setLogging(!mod_nologging->m_val_bool);
10246 }
10247 }
10248 tab.setSingleUserMode(single_user_mode);
10249
10250 // Save frm data for this table
10251 if (readfrm(name, &data, &length))
10252 {
10253 result= 1;
10254 goto abort_return;
10255 }
10256 if (packfrm(data, length, &pack_data, &pack_length))
10257 {
10258 my_free((char*)data, MYF(0));
10259 result= 2;
10260 goto abort_return;
10261 }
10262 DBUG_PRINT("info",
10263 ("setFrm data: 0x%lx len: %lu", (long) pack_data,
10264 (ulong) pack_length));
10265 tab.setFrm(pack_data, Uint32(pack_length));
10266 my_free((char*)data, MYF(0));
10267 my_free((char*)pack_data, MYF(0));
10268
10269 /*
10270 Handle table row type
10271
10272 Default is to let table rows have var part reference so that online
10273 add column can be performed in the future. Explicitly setting row
10274 type to fixed will omit var part reference, which will save data
10275 memory in ndb, but at the cost of not being able to online add
10276 column to this table
10277 */
10278 switch (create_info->row_type) {
10279 case ROW_TYPE_FIXED:
10280 tab.setForceVarPart(FALSE);
10281 break;
10282 case ROW_TYPE_DYNAMIC:
10283 /* fall through, treat as default */
10284 default:
10285 /* fall through, treat as default */
10286 case ROW_TYPE_DEFAULT:
10287 tab.setForceVarPart(TRUE);
10288 break;
10289 }
10290
10291 /*
10292 Setup columns
10293 */
10294 my_bitmap_map *old_map;
10295 {
10296 restore_record(form, s->default_values);
10297 old_map= tmp_use_all_columns(form, form->read_set);
10298 }
10299
10300 for (i= 0; i < form->s->fields; i++)
10301 {
10302 Field *field= form->field[i];
10303 DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",
10304 field->field_name, field->real_type(),
10305 field->pack_length()));
10306 set_my_errno(create_ndb_column(thd, col, field, create_info));
10307 if (my_errno())
10308 goto abort;
10309
10310 if (!use_disk &&
10311 col.getStorageType() == NDBCOL::StorageTypeDisk)
10312 use_disk= TRUE;
10313
10314 if (tab.addColumn(col))
10315 {
10316 set_my_errno(errno);
10317 goto abort;
10318 }
10319 if (col.getPrimaryKey())
10320 pk_length += (field->pack_length() + 3) / 4;
10321 }
10322
10323 tmp_restore_column_map(form->read_set, old_map);
10324 if (use_disk)
10325 {
10326 tab.setLogging(TRUE);
10327 tab.setTemporary(FALSE);
10328 if (create_info->tablespace)
10329 tab.setTablespaceName(create_info->tablespace);
10330 else
10331 tab.setTablespaceName("DEFAULT-TS");
10332 }
10333
10334 // Save the table level storage media setting
10335 switch(create_info->storage_media)
10336 {
10337 case HA_SM_DISK:
10338 tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);
10339 break;
10340 case HA_SM_DEFAULT:
10341 tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);
10342 break;
10343 case HA_SM_MEMORY:
10344 tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);
10345 break;
10346 }
10347
10348 DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",
10349 m_tabname,
10350 (use_disk) ? "disk" : "memory",
10351 (use_disk) ? tab.getTablespaceName() : "N/A"));
10352
10353 KEY* key_info;
10354 for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
10355 {
10356 KEY_PART_INFO *key_part= key_info->key_part;
10357 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
10358 for (; key_part != end; key_part++)
10359 {
10360 if (key_part->field->field_storage_type() == HA_SM_DISK)
10361 {
10362 push_warning_printf(thd, Sql_condition::SL_WARNING,
10363 ER_ILLEGAL_HA_CREATE_OPTION,
10364 ER(ER_ILLEGAL_HA_CREATE_OPTION),
10365 ndbcluster_hton_name,
10366 "Index on field "
10367 "declared with "
10368 "STORAGE DISK is not supported");
10369 result= HA_ERR_UNSUPPORTED;
10370 goto abort_return;
10371 }
10372 tab.getColumn(key_part->fieldnr-1)->setStorageType(
10373 NdbDictionary::Column::StorageTypeMemory);
10374 }
10375 }
10376
10377 // No primary key, create shadow key as 64 bit, auto increment
10378 if (form->s->primary_key == MAX_KEY)
10379 {
10380 DBUG_PRINT("info", ("Generating shadow key"));
10381 if (col.setName("$PK"))
10382 {
10383 set_my_errno(errno);
10384 goto abort;
10385 }
10386 col.setType(NdbDictionary::Column::Bigunsigned);
10387 col.setLength(1);
10388 col.setNullable(FALSE);
10389 col.setPrimaryKey(TRUE);
10390 col.setAutoIncrement(TRUE);
10391 col.setDefaultValue(NULL, 0);
10392 if (tab.addColumn(col))
10393 {
10394 set_my_errno(errno);
10395 goto abort;
10396 }
10397 pk_length += 2;
10398 }
10399
10400 // Make sure that blob tables don't have too big part size
10401 for (i= 0; i < form->s->fields; i++)
10402 {
10403 /**
10404 * The extra +7 concists
10405 * 2 - words from pk in blob table
10406 * 5 - from extra words added by tup/dict??
10407 */
10408
10409 // To be upgrade/downgrade safe...we currently use
10410 // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set
10411 switch (form->field[i]->real_type()) {
10412 case MYSQL_TYPE_GEOMETRY:
10413 case MYSQL_TYPE_BLOB:
10414 case MYSQL_TYPE_MEDIUM_BLOB:
10415 case MYSQL_TYPE_LONG_BLOB:
10416 case MYSQL_TYPE_JSON:
10417 {
10418 NdbDictionary::Column * column= tab.getColumn(i);
10419 unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;
10420 unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;
10421 if (column->getPartSize() > (int)(4 * ndb_max))
10422 ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE
10423
10424 if (size > ndb_max &&
10425 (pk_length+7) < ndb_max)
10426 {
10427 size= ndb_max - pk_length - 7;
10428 column->setPartSize(4*size);
10429 }
10430 /**
10431 * If size > NDB_MAX and pk_length+7 >= NDB_MAX
10432 * then the table can't be created anyway, so skip
10433 * changing part size, and have error later
10434 */
10435 }
10436 default:
10437 break;
10438 }
10439 }
10440
10441 // Assume that table_share->max/min_rows equals create_info->min/max
10442 // although this is create so create_info should be used
10443 assert(create_info->max_rows == table_share->max_rows);
10444 assert(create_info->min_rows == table_share->min_rows);
10445
10446 // Check partition info
10447 set_my_errno(create_table_set_up_partition_info(create_info,
10448 form->part_info,
10449 tab));
10450 if (my_errno())
10451 goto abort;
10452
10453 if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
10454 tab.getDefaultNoPartitionsFlag() &&
10455 (create_info->max_rows != 0 || create_info->min_rows != 0))
10456 {
10457 ulonglong rows= create_info->max_rows >= create_info->min_rows ?
10458 create_info->max_rows :
10459 create_info->min_rows;
10460 uint no_fragments= get_no_fragments(rows);
10461 uint reported_frags= no_fragments;
10462 if (adjusted_frag_count(ndb, no_fragments, reported_frags))
10463 {
10464 push_warning(current_thd,
10465 Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
10466 "Ndb might have problems storing the max amount "
10467 "of rows specified");
10468 }
10469 tab.setFragmentCount(reported_frags);
10470 tab.setDefaultNoPartitionsFlag(false);
10471 tab.setFragmentData(0, 0);
10472 }
10473
10474 // Check for HashMap
10475 if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
10476 tab.getDefaultNoPartitionsFlag())
10477 {
10478 tab.setFragmentCount(0);
10479 tab.setFragmentData(0, 0);
10480 }
10481 else if (tab.getFragmentType() == NDBTAB::HashMapPartition)
10482 {
10483 NdbDictionary::HashMap hm;
10484 int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());
10485 if (res == -1)
10486 {
10487 res= dict->initDefaultHashMap(hm, tab.getFragmentCount());
10488 if (res == -1)
10489 {
10490 const NdbError err= dict->getNdbError();
10491 set_my_errno(ndb_to_mysql_error(&err));
10492 goto abort;
10493 }
10494
10495 res= dict->createHashMap(hm);
10496 if (res == -1)
10497 {
10498 const NdbError err= dict->getNdbError();
10499 set_my_errno(ndb_to_mysql_error(&err));
10500 goto abort;
10501 }
10502 }
10503 }
10504
10505 // Create the table in NDB
10506 if (dict->createTable(tab, &objId) != 0)
10507 {
10508 const NdbError err= dict->getNdbError();
10509 set_my_errno(ndb_to_mysql_error(&err));
10510 goto abort;
10511 }
10512
10513 DBUG_PRINT("info", ("Table %s/%s created successfully",
10514 m_dbname, m_tabname));
10515
10516 // Create secondary indexes
10517 tab.assignObjId(objId);
10518 m_table= &tab;
10519 set_my_errno(create_indexes(thd, ndb, form));
10520
10521 if (!is_truncate && my_errno() == 0)
10522 {
10523 set_my_errno(create_fks(thd, ndb));
10524 }
10525
10526 if (is_alter && my_errno() == 0)
10527 {
10528 /**
10529 * mysql doesnt know/care about FK (buhhh)
10530 * so we need to copy the old ones ourselves
10531 */
10532 set_my_errno(copy_fk_for_offline_alter(thd, ndb, &tab));
10533 }
10534
10535 if (!fk_list_for_truncate.is_empty() && my_errno() == 0)
10536 {
10537 /*
10538 create FKs for the new table from the list got from old table.
10539 for truncate table.
10540 */
10541 set_my_errno(recreate_fk_for_truncate(thd, ndb, tab.getName(),
10542 fk_list_for_truncate));
10543 }
10544
10545 m_table= 0;
10546
10547 if (!my_errno())
10548 {
10549 /*
10550 * All steps have succeeded, try and commit schema transaction
10551 */
10552 if (dict->endSchemaTrans() == -1)
10553 goto err_return;
10554 set_my_errno(write_ndb_file(name));
10555 }
10556 else
10557 {
10558 abort:
10559 /*
10560 * Some step during table creation failed, abort schema transaction
10561 */
10562 DBUG_PRINT("info", ("Aborting schema transaction due to error %i",
10563 my_errno()));
10564 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10565 == -1)
10566 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10567 dict->getNdbError().code));
10568 m_table= 0;
10569
10570 {
10571 // Flush the table out of ndbapi's dictionary cache
10572 Ndb_table_guard ndbtab_g(dict);
10573 ndbtab_g.init(m_tabname);
10574 ndbtab_g.invalidate();
10575 }
10576
10577 DBUG_RETURN(my_errno());
10578 abort_return:
10579 DBUG_PRINT("info", ("Aborting schema transaction"));
10580 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10581 == -1)
10582 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10583 dict->getNdbError().code));
10584 DBUG_RETURN(result);
10585 err_return:
10586 m_table= 0;
10587 ERR_RETURN(dict->getNdbError());
10588 }
10589
10590 /**
10591 * createTable/index schema transaction OK
10592 */
10593 Ndb_table_guard ndbtab_g(dict, m_tabname);
10594 m_table= ndbtab_g.get_table();
10595
10596 if (my_errno())
10597 {
10598 /*
10599 Failed to create an index,
10600 drop the table (and all it's indexes)
10601 */
10602 while (!thd->killed)
10603 {
10604 if (dict->beginSchemaTrans() == -1)
10605 goto cleanup_failed;
10606 if (dict->dropTableGlobal(*m_table))
10607 {
10608 switch (dict->getNdbError().status)
10609 {
10610 case NdbError::TemporaryError:
10611 if (!thd->killed)
10612 {
10613 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10614 == -1)
10615 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10616 dict->getNdbError().code));
10617 goto cleanup_failed;
10618 }
10619 break;
10620 default:
10621 break;
10622 }
10623 }
10624 if (dict->endSchemaTrans() == -1)
10625 {
10626 cleanup_failed:
10627 DBUG_PRINT("info", ("Could not cleanup failed create %i",
10628 dict->getNdbError().code));
10629 continue; // retry indefinitly
10630 }
10631 break;
10632 }
10633 m_table = 0;
10634 DBUG_RETURN(my_errno());
10635 }
10636 else // if (!my_errno)
10637 {
10638 NDB_SHARE *share= 0;
10639 native_mutex_lock(&ndbcluster_mutex);
10640 /*
10641 First make sure we get a "fresh" share here, not an old trailing one...
10642 */
10643 {
10644 uint length= (uint) strlen(name);
10645 if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
10646 (const uchar*) name, length)))
10647 handle_trailing_share(thd, share);
10648 }
10649 /*
10650 get a new share
10651 */
10652
10653 /* ndb_share reference create */
10654 if (!(share= get_share(name, form, TRUE, TRUE)))
10655 {
10656 sql_print_error("NDB: allocating table share for %s failed", name);
10657 /* my_errno is set */
10658 }
10659 else
10660 {
10661 DBUG_PRINT("NDB_SHARE", ("%s binlog create use_count: %u",
10662 share->key_string(), share->use_count));
10663 }
10664 native_mutex_unlock(&ndbcluster_mutex);
10665
10666 while (!IS_TMP_PREFIX(m_tabname))
10667 {
10668 #ifdef HAVE_NDB_BINLOG
10669 if (share)
10670 {
10671 /* Set the Binlogging information we retrieved above */
10672 ndbcluster_apply_binlog_replication_info(thd,
10673 share,
10674 m_table,
10675 conflict_fn,
10676 args,
10677 num_args,
10678 TRUE, /* Do set binlog flags */
10679 binlog_flags);
10680 }
10681 #endif
10682 String event_name(INJECTOR_EVENT_LEN);
10683 ndb_rep_event_name(&event_name, m_dbname, m_tabname,
10684 get_binlog_full(share));
10685 int do_event_op= ndb_binlog_running;
10686
10687 if (!ndb_schema_dist_is_ready() &&
10688 strcmp(share->db, NDB_REP_DB) == 0 &&
10689 strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
10690 do_event_op= 1;
10691
10692 /*
10693 Always create an event for the table, as other mysql servers
10694 expect it to be there.
10695 */
10696 if (!Ndb_dist_priv_util::is_distributed_priv_table(m_dbname, m_tabname) &&
10697 !ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,
10698 do_event_op ? 2 : 1/* push warning */))
10699 {
10700 if (opt_ndb_extra_logging)
10701 sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
10702 event_name.c_ptr());
10703
10704 if (ndbcluster_create_event_ops(thd, share,
10705 m_table, event_name.c_ptr()))
10706 {
10707 sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
10708 " Event: %s", name);
10709 /* a warning has been issued to the client */
10710 }
10711 }
10712 /*
10713 warning has been issued if ndbcluster_create_event failed
10714 and (share && do_event_op)
10715 */
10716 if (share && !do_event_op)
10717 set_binlog_nologging(share);
10718 ndbcluster_log_schema_op(thd,
10719 thd->query().str, thd->query().length,
10720 share->db, share->table_name,
10721 m_table->getObjectId(),
10722 m_table->getObjectVersion(),
10723 (is_truncate) ?
10724 SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE,
10725 NULL, NULL);
10726 break;
10727 }
10728 }
10729
10730 m_table= 0;
10731 DBUG_RETURN(my_errno());
10732 }
10733
10734
create_index(THD * thd,const char * name,KEY * key_info,NDB_INDEX_TYPE idx_type,uint idx_no) const10735 int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info,
10736 NDB_INDEX_TYPE idx_type, uint idx_no) const
10737 {
10738 int error= 0;
10739 char unique_name[FN_LEN + 1];
10740 static const char* unique_suffix= "$unique";
10741 DBUG_ENTER("ha_ndbcluster::create_index");
10742 DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));
10743
10744 if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
10745 {
10746 strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
10747 DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
10748 unique_name, idx_no));
10749 }
10750
10751 switch (idx_type){
10752 case PRIMARY_KEY_INDEX:
10753 // Do nothing, already created
10754 break;
10755 case PRIMARY_KEY_ORDERED_INDEX:
10756 error= create_ordered_index(thd, name, key_info);
10757 break;
10758 case UNIQUE_ORDERED_INDEX:
10759 if (!(error= create_ordered_index(thd, name, key_info)))
10760 error= create_unique_index(thd, unique_name, key_info);
10761 break;
10762 case UNIQUE_INDEX:
10763 if (check_index_fields_not_null(key_info))
10764 {
10765 push_warning_printf(thd, Sql_condition::SL_WARNING,
10766 ER_NULL_COLUMN_IN_INDEX,
10767 "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
10768 }
10769 error= create_unique_index(thd, unique_name, key_info);
10770 break;
10771 case ORDERED_INDEX:
10772 if (key_info->algorithm == HA_KEY_ALG_HASH)
10773 {
10774 push_warning_printf(thd, Sql_condition::SL_WARNING,
10775 ER_ILLEGAL_HA_CREATE_OPTION,
10776 ER(ER_ILLEGAL_HA_CREATE_OPTION),
10777 ndbcluster_hton_name,
10778 "Ndb does not support non-unique "
10779 "hash based indexes");
10780 error= HA_ERR_UNSUPPORTED;
10781 break;
10782 }
10783 error= create_ordered_index(thd, name, key_info);
10784 break;
10785 default:
10786 assert(FALSE);
10787 break;
10788 }
10789
10790 DBUG_RETURN(error);
10791 }
10792
create_ordered_index(THD * thd,const char * name,KEY * key_info) const10793 int ha_ndbcluster::create_ordered_index(THD *thd, const char *name,
10794 KEY *key_info) const
10795 {
10796 DBUG_ENTER("ha_ndbcluster::create_ordered_index");
10797 DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));
10798 }
10799
create_unique_index(THD * thd,const char * name,KEY * key_info) const10800 int ha_ndbcluster::create_unique_index(THD *thd, const char *name,
10801 KEY *key_info) const
10802 {
10803
10804 DBUG_ENTER("ha_ndbcluster::create_unique_index");
10805 DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));
10806 }
10807
10808
10809 /**
10810 Create an index in NDB Cluster.
10811
10812 @todo
10813 Only temporary ordered indexes supported
10814 */
10815
create_ndb_index(THD * thd,const char * name,KEY * key_info,bool unique) const10816 int ha_ndbcluster::create_ndb_index(THD *thd, const char *name,
10817 KEY *key_info,
10818 bool unique) const
10819 {
10820 char index_name[FN_LEN + 1];
10821 Ndb *ndb= get_ndb(thd);
10822 NdbDictionary::Dictionary *dict= ndb->getDictionary();
10823 KEY_PART_INFO *key_part= key_info->key_part;
10824 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
10825
10826 DBUG_ENTER("ha_ndbcluster::create_index");
10827 DBUG_PRINT("enter", ("name: %s ", name));
10828
10829 ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');
10830 DBUG_PRINT("info", ("index name: %s ", index_name));
10831
10832 NdbDictionary::Index ndb_index(index_name);
10833 if (unique)
10834 ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
10835 else
10836 {
10837 ndb_index.setType(NdbDictionary::Index::OrderedIndex);
10838 // TODO Only temporary ordered indexes supported
10839 ndb_index.setLogging(FALSE);
10840 }
10841 if (!m_table->getLogging())
10842 ndb_index.setLogging(FALSE);
10843 if (((NDBTAB*)m_table)->getTemporary())
10844 ndb_index.setTemporary(TRUE);
10845 if (ndb_index.setTable(m_tabname))
10846 {
10847 set_my_errno(errno);
10848 DBUG_RETURN(errno);
10849 }
10850
10851 for (; key_part != end; key_part++)
10852 {
10853 Field *field= key_part->field;
10854 if (field->field_storage_type() == HA_SM_DISK)
10855 {
10856 push_warning_printf(thd, Sql_condition::SL_WARNING,
10857 ER_ILLEGAL_HA_CREATE_OPTION,
10858 ER(ER_ILLEGAL_HA_CREATE_OPTION),
10859 ndbcluster_hton_name,
10860 "Index on field "
10861 "declared with "
10862 "STORAGE DISK is not supported");
10863 DBUG_RETURN(HA_ERR_UNSUPPORTED);
10864 }
10865 DBUG_PRINT("info", ("attr: %s", field->field_name));
10866 if (ndb_index.addColumnName(field->field_name))
10867 {
10868 set_my_errno(errno);
10869 DBUG_RETURN(errno);
10870 }
10871 }
10872
10873 if (dict->createIndex(ndb_index, *m_table))
10874 ERR_RETURN(dict->getNdbError());
10875
10876 // Success
10877 DBUG_PRINT("info", ("Created index %s", name));
10878 DBUG_RETURN(0);
10879 }
10880
10881 /*
10882 Prepare for an on-line alter table
10883 */
prepare_for_alter()10884 void ha_ndbcluster::prepare_for_alter()
10885 {
10886 /* ndb_share reference schema */
10887 ndbcluster_get_share(m_share); // Increase ref_count
10888 DBUG_PRINT("NDB_SHARE", ("%s binlog schema use_count: %u",
10889 m_share->key_string(), m_share->use_count));
10890 set_ndb_share_state(m_share, NSS_ALTERED);
10891 }
10892
10893 /*
10894 Add an index on-line to a table
10895 */
10896 /*
10897 int ha_ndbcluster::add_index(TABLE *table_arg,
10898 KEY *key_info, uint num_of_keys,
10899 handler_add_index **add)
10900 {
10901 // TODO: As we don't yet implement ::final_add_index(),
10902 // we don't need a handler_add_index object either..?
10903 *add= NULL; // new handler_add_index(table_arg, key_info, num_of_keys);
10904 return add_index_impl(current_thd, table_arg, key_info, num_of_keys);
10905 }
10906 */
10907
add_index_impl(THD * thd,TABLE * table_arg,KEY * key_info,uint num_of_keys)10908 int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg,
10909 KEY *key_info, uint num_of_keys)
10910 {
10911 int error= 0;
10912 uint idx;
10913 DBUG_ENTER("ha_ndbcluster::add_index");
10914 DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
10915 assert(m_share->state == NSS_ALTERED);
10916
10917 for (idx= 0; idx < num_of_keys; idx++)
10918 {
10919 KEY *key= key_info + idx;
10920 KEY_PART_INFO *key_part= key->key_part;
10921 KEY_PART_INFO *end= key_part + key->user_defined_key_parts;
10922 NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
10923 DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
10924 // Add fields to key_part struct
10925 for (; key_part != end; key_part++)
10926 key_part->field= table->field[key_part->fieldnr];
10927 // Check index type
10928 // Create index in ndb
10929 if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))
10930 break;
10931 }
10932 DBUG_RETURN(error);
10933 }
10934
10935 /*
10936 Mark one or several indexes for deletion. and
10937 renumber the remaining indexes
10938 */
prepare_drop_index(TABLE * table_arg,uint * key_num,uint num_of_keys)10939 int ha_ndbcluster::prepare_drop_index(TABLE *table_arg,
10940 uint *key_num, uint num_of_keys)
10941 {
10942 DBUG_ENTER("ha_ndbcluster::prepare_drop_index");
10943 assert(m_share->state == NSS_ALTERED);
10944 // Mark indexes for deletion
10945 uint idx;
10946 for (idx= 0; idx < num_of_keys; idx++)
10947 {
10948 DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num));
10949 uint i = *key_num++;
10950 m_index[i].status= TO_BE_DROPPED;
10951 // Prepare delete of index stat entry
10952 if (m_index[i].type == PRIMARY_KEY_ORDERED_INDEX ||
10953 m_index[i].type == UNIQUE_ORDERED_INDEX ||
10954 m_index[i].type == ORDERED_INDEX)
10955 {
10956 const NdbDictionary::Index *index= m_index[i].index;
10957 if (index) // safety
10958 {
10959 int index_id= index->getObjectId();
10960 int index_version= index->getObjectVersion();
10961 ndb_index_stat_free(m_share, index_id, index_version);
10962 }
10963 }
10964 }
10965 // Renumber indexes
10966 THD *thd= current_thd;
10967 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10968 Ndb *ndb= thd_ndb->ndb;
10969 renumber_indexes(ndb, table_arg);
10970 DBUG_RETURN(0);
10971 }
10972
10973 /*
10974 Really drop all indexes marked for deletion
10975 */
final_drop_index(TABLE * table_arg)10976 int ha_ndbcluster::final_drop_index(TABLE *table_arg)
10977 {
10978 int error;
10979 DBUG_ENTER("ha_ndbcluster::final_drop_index");
10980 // Really drop indexes
10981 THD *thd= current_thd;
10982 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10983 Ndb *ndb= thd_ndb->ndb;
10984 error= drop_indexes(ndb, table_arg);
10985 DBUG_RETURN(error);
10986 }
10987
10988
10989 extern void ndb_fk_util_resolve_mock_tables(THD* thd,
10990 NdbDictionary::Dictionary* dict,
10991 const char* new_parent_db,
10992 const char* new_parent_name);
10993
10994
10995 int
rename_table_impl(THD * thd,Ndb * ndb,const NdbDictionary::Table * orig_tab,const char * from,const char * to,const char * old_dbname,const char * old_tabname,const char * new_dbname,const char * new_tabname,bool real_rename,const char * real_rename_db,const char * real_rename_name,bool real_rename_log_on_participant,bool drop_events,bool create_events,bool commit_alter)10996 ha_ndbcluster::rename_table_impl(THD* thd, Ndb* ndb,
10997 const NdbDictionary::Table* orig_tab,
10998 const char* from, const char* to,
10999 const char* old_dbname,
11000 const char* old_tabname,
11001 const char* new_dbname,
11002 const char* new_tabname,
11003 bool real_rename,
11004 const char* real_rename_db,
11005 const char* real_rename_name,
11006 bool real_rename_log_on_participant,
11007 bool drop_events,
11008 bool create_events,
11009 bool commit_alter)
11010 {
11011 DBUG_ENTER("ha_ndbcluster::rename_table_impl");
11012 DBUG_PRINT("info", ("real_rename: %d", real_rename));
11013 DBUG_PRINT("info", ("real_rename_db: '%s'", real_rename_db));
11014 DBUG_PRINT("info", ("real_rename_name: '%s'", real_rename_name));
11015 DBUG_PRINT("info", ("real_rename_log_on_participant: %d",
11016 real_rename_log_on_participant));
11017 // Verify default values of real_rename related parameters
11018 assert(real_rename ||
11019 (real_rename_db == NULL &&
11020 real_rename_name == NULL &&
11021 real_rename_log_on_participant == false));
11022
11023 DBUG_PRINT("info", ("drop_events: %d", drop_events));
11024 DBUG_PRINT("info", ("create_events: %d", create_events));
11025 DBUG_PRINT("info", ("commit_alter: %d", commit_alter));
11026
11027 NDBDICT* dict = ndb->getDictionary();
11028 NDBDICT::List index_list;
11029 if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
11030 {
11031 // When moving tables between databases the indexes need to be
11032 // recreated, save list of indexes before rename to allow
11033 // them to be recreated afterwards
11034 dict->listIndexes(index_list, *orig_tab);
11035 }
11036
11037 // Change current database to that of target table
11038 if (ndb->setDatabaseName(new_dbname))
11039 {
11040 ERR_RETURN(ndb->getNdbError());
11041 }
11042
11043 const int ndb_table_id= orig_tab->getObjectId();
11044 const int ndb_table_version= orig_tab->getObjectVersion();
11045
11046 Ndb_share_temp_ref share(from);
11047 if (real_rename)
11048 {
11049 /*
11050 Prepare the rename on the participant, i.e make the participant
11051 save the final table name in the NDB_SHARE of the table to be renamed.
11052
11053 NOTE! The tricky thing here is that the NDB_SHARE haven't yet been
11054 renamed on the participant and thus you have to use the original
11055 table name when communicating with the participant, otherwise it
11056 will not find the share where to stash the final table name.
11057
11058 Also note that the main reason for doing this prepare phase
11059 (which the participant can't refuse) is due to lack of placeholders
11060 available in the schema dist protocol. There are simply not
11061 enough placeholders available to transfer all required parameters
11062 at once.
11063 */
11064 ndbcluster_log_schema_op(thd, to, (int)strlen(to),
11065 real_rename_db, real_rename_name,
11066 ndb_table_id, ndb_table_version,
11067 SOT_RENAME_TABLE_PREPARE,
11068 new_dbname /* unused */,
11069 new_tabname /* unused */);
11070 }
11071 NDB_SHARE_KEY* old_key = share->key; // Save current key
11072 NDB_SHARE_KEY* new_key = NDB_SHARE::create_key(to);
11073 (void)ndbcluster_rename_share(thd, share, new_key);
11074
11075 NdbDictionary::Table new_tab= *orig_tab;
11076 new_tab.setName(new_tabname);
11077 if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
11078 {
11079 const NdbError ndb_error= dict->getNdbError();
11080 // Rename the share back to old_key
11081 (void)ndbcluster_rename_share(thd, share, old_key);
11082 // Release the unused new_key
11083 NDB_SHARE::free_key(new_key);
11084 ERR_RETURN(ndb_error);
11085 }
11086 // Release the unused old_key
11087 NDB_SHARE::free_key(old_key);
11088
11089 ndb_fk_util_resolve_mock_tables(thd, ndb->getDictionary(),
11090 new_dbname, new_tabname);
11091
11092 {
11093 // Rename .ndb file
11094 int result;
11095 if ((result= handler::rename_table(from, to)))
11096 {
11097 // ToDo in 4.1 should rollback alter table...
11098
11099 DBUG_RETURN(result);
11100 }
11101 }
11102
11103 /* handle old table */
11104 if (drop_events)
11105 {
11106 ndbcluster_drop_event(thd, ndb, share,
11107 old_dbname, old_tabname);
11108 }
11109
11110 if (create_events)
11111 {
11112 Ndb_table_guard ndbtab_g2(dict, new_tabname);
11113 const NDBTAB *ndbtab= ndbtab_g2.get_table();
11114 #ifdef HAVE_NDB_BINLOG
11115 ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,
11116 ::server_id, TRUE);
11117 #endif
11118 /* always create an event for the table */
11119 String event_name(INJECTOR_EVENT_LEN);
11120 ndb_rep_event_name(&event_name, new_dbname, new_tabname,
11121 get_binlog_full(share));
11122
11123 if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,
11124 new_tabname) &&
11125 !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,
11126 ndb_binlog_running ? 2 : 1/* push warning */))
11127 {
11128 if (opt_ndb_extra_logging)
11129 sql_print_information("NDB Binlog: RENAME Event: %s",
11130 event_name.c_ptr());
11131 if (share->op == 0 &&
11132 ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))
11133 {
11134 sql_print_error("NDB Binlog: FAILED create event operations "
11135 "during RENAME. Event %s", event_name.c_ptr());
11136 /* a warning has been issued to the client */
11137 }
11138 }
11139 /*
11140 warning has been issued if ndbcluster_create_event failed
11141 and ndb_binlog_running
11142 */
11143 }
11144
11145 if (real_rename)
11146 {
11147 /*
11148 Commit of "real" rename table on participant i.e make the participant
11149 extract the original table name which it got in prepare.
11150
11151 NOTE! The tricky thing also here is that the NDB_SHARE haven't yet been
11152 renamed on the participant and thus you have to use the original
11153 table name when communicating with the participant, otherwise it
11154 will not find the share where the final table name has been stashed.
11155
11156 Also note the special flag which control wheter or not this
11157 query is written to binlog or not on the participants.
11158 */
11159 ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
11160 real_rename_db, real_rename_name,
11161 ndb_table_id, ndb_table_version,
11162 SOT_RENAME_TABLE,
11163 new_dbname, new_tabname,
11164 real_rename_log_on_participant);
11165 }
11166
11167 if (commit_alter)
11168 {
11169 /* final phase of offline alter table */
11170 ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
11171 new_dbname, new_tabname,
11172 ndb_table_id, ndb_table_version,
11173 SOT_ALTER_TABLE_COMMIT,
11174 NULL, NULL);
11175 }
11176
11177 for (unsigned i = 0; i < index_list.count; i++)
11178 {
11179 NDBDICT::List::Element& index_el = index_list.elements[i];
11180 // Recreate any indexes not stored in the system database
11181 if (my_strcasecmp(system_charset_info,
11182 index_el.database, NDB_SYSTEM_DATABASE))
11183 {
11184 // Get old index
11185 ndb->setDatabaseName(old_dbname);
11186 const NDBINDEX * index= dict->getIndexGlobal(index_el.name, new_tab);
11187 DBUG_PRINT("info", ("Creating index %s/%s",
11188 index_el.database, index->getName()));
11189 // Create the same "old" index on new tab
11190 dict->createIndex(*index, new_tab);
11191 DBUG_PRINT("info", ("Dropping index %s/%s",
11192 index_el.database, index->getName()));
11193 // Drop old index
11194 ndb->setDatabaseName(old_dbname);
11195 dict->dropIndexGlobal(*index);
11196 }
11197 }
11198 DBUG_RETURN(0);
11199 }
11200
11201
11202 /**
11203 Rename a table in NDB and on the participating mysqld(s)
11204 */
11205
rename_table(const char * from,const char * to)11206 int ha_ndbcluster::rename_table(const char *from, const char *to)
11207 {
11208 THD *thd= current_thd;
11209 char old_dbname[FN_HEADLEN];
11210 char new_dbname[FN_HEADLEN];
11211 char new_tabname[FN_HEADLEN];
11212
11213 DBUG_ENTER("ha_ndbcluster::rename_table");
11214 DBUG_PRINT("info", ("Renaming %s to %s", from, to));
11215
11216 /*
11217 ALTER RENAME with some more change is currently not supported
11218 by Ndb due to
11219 Bug #16021021 ALTER ... RENAME FAILS TO RENAME ON PARTICIPANT MYSQLD
11220
11221 Check if command is not RENAME and some more alter_flag
11222 except ALTER_RENAME is set.
11223 */
11224 if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
11225 {
11226 Alter_info *alter_info= &(thd->lex->alter_info);
11227 uint flags= alter_info->flags;
11228
11229 if (flags & Alter_info::ALTER_RENAME && flags & ~Alter_info::ALTER_RENAME)
11230 {
11231 my_error(ER_NOT_SUPPORTED_YET, MYF(0), thd->query().str);
11232 DBUG_RETURN(ER_NOT_SUPPORTED_YET);
11233 }
11234 }
11235
11236 set_dbname(from, old_dbname);
11237 set_dbname(to, new_dbname);
11238 set_tabname(from);
11239 set_tabname(to, new_tabname);
11240
11241 DBUG_PRINT("info", ("old_tabname: '%s'", m_tabname));
11242 DBUG_PRINT("info", ("new_tabname: '%s'", new_tabname));
11243
11244 /* Check that the new table or database name does not exceed max limit */
11245 if (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
11246 strlen(new_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
11247 {
11248 char *invalid_identifier=
11249 (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE) ?
11250 new_dbname : new_tabname;
11251 push_warning_printf(thd, Sql_condition::SL_WARNING,
11252 ER_TOO_LONG_IDENT,
11253 "Ndb has an internal limit of %u bytes on the "\
11254 "size of schema identifiers",
11255 NDB_MAX_DDL_NAME_BYTESIZE);
11256 my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
11257 DBUG_RETURN(HA_WRONG_CREATE_OPTION);
11258 }
11259
11260 if (check_ndb_connection(thd))
11261 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11262
11263 Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);
11264 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
11265 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11266
11267 // Open the table which is to be renamed(aka. the old)
11268 Ndb *ndb= get_ndb(thd);
11269 ndb->setDatabaseName(old_dbname);
11270 NDBDICT *dict= ndb->getDictionary();
11271 Ndb_table_guard ndbtab_g(dict, m_tabname);
11272 const NDBTAB *orig_tab;
11273 if (!(orig_tab= ndbtab_g.get_table()))
11274 ERR_RETURN(dict->getNdbError());
11275 DBUG_PRINT("info", ("NDB table name: '%s'", orig_tab->getName()));
11276
11277 // Magically detect if this is a rename or some form of alter
11278 // and decide which actions need to be performed
11279 const bool old_is_temp = IS_TMP_PREFIX(m_tabname);
11280 const bool new_is_temp = IS_TMP_PREFIX(new_tabname);
11281 switch (thd_sql_command(thd))
11282 {
11283 case SQLCOM_DROP_INDEX:
11284 case SQLCOM_CREATE_INDEX:
11285 DBUG_PRINT("info", ("CREATE or DROP INDEX as copying ALTER"));
11286 // fallthrough
11287 case SQLCOM_ALTER_TABLE:
11288 DBUG_PRINT("info", ("SQLCOM_ALTER_TABLE"));
11289
11290 if (!new_is_temp && !old_is_temp)
11291 {
11292 /*
11293 This is a rename directly from real to real which occurs:
11294 1) when the ALTER is "simple" RENAME i.e only consists of RENAME
11295 and/or enable/disable indexes
11296 2) as part of inplace ALTER .. RENAME
11297 */
11298 DBUG_PRINT("info", ("simple rename detected"));
11299 DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11300 old_dbname, m_tabname,
11301 new_dbname, new_tabname,
11302 true, // real_rename
11303 old_dbname, // real_rename_db
11304 m_tabname, // real_rename_name
11305 true, // real_rename_log_on_participants
11306 true, // drop_events
11307 true, // create events
11308 false)); // commit_alter
11309 }
11310
11311 // Make sure that inplace was not requested
11312 assert(thd->lex->alter_info.requested_algorithm !=
11313 Alter_info::ALTER_TABLE_ALGORITHM_INPLACE);
11314
11315 /*
11316 This is a copying alter table which is implemented as
11317 1) Create destination table with temporary name
11318 -> ha_ndbcluster::create_table('#sql_75636-87')
11319 There are now the source table and one with temporary name:
11320 [t1] + [#sql_75636-87]
11321 2) Copy data from source table to destination table.
11322 3) Backup the source table by renaming it to another temporary name.
11323 -> ha_ndbcluster::rename_table('t1', '#sql_86545-98')
11324 There are now two temporary named tables:
11325 [#sql_86545-98] + [#sql_75636-87]
11326 4) Rename the destination table to it's real name.
11327 -> ha_ndbcluster::rename_table('#sql_75636-87', 't1')
11328 5) Drop the source table
11329
11330
11331 */
11332
11333 if (new_is_temp)
11334 {
11335 /*
11336 This is an alter table which renames real name to temp name.
11337 ie. step 3) per above and is the first of
11338 two rename_table() calls. Drop events from the table.
11339 */
11340 DBUG_PRINT("info", ("real -> temp"));
11341 DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11342 old_dbname, m_tabname,
11343 new_dbname, new_tabname,
11344 false, // real_rename
11345 NULL, // real_rename_db
11346 NULL, // real_rename_name
11347 false, // real_rename_log_on_participants
11348 true, // drop_events
11349 false, // create events
11350 false)); // commit_alter
11351 }
11352
11353 if (old_is_temp)
11354 {
11355 /*
11356 This is an alter table which renames temp name to real name.
11357 ie. step 5) per above and is the second call to rename_table().
11358 Create new events and commit the alter so that participant are
11359 made aware that the table changed and can reopen the table.
11360 */
11361 DBUG_PRINT("info", ("temp -> real"));
11362
11363 /*
11364 Detect if this is the special case which occurs when
11365 the table is both altered and renamed.
11366
11367 Important here is to remeber to rename the table also
11368 on all partiticipants so they will find the table when
11369 the alter is completed. This is slightly problematic since
11370 their table is renamed directly from real to real name, while
11371 the mysqld who performs the alter renames from temp to real
11372 name. Fortunately it's possible to lookup the original table
11373 name via THD.
11374 */
11375 const char* orig_name = thd->lex->select_lex->table_list.first->table_name;
11376 const char* orig_db = thd->lex->select_lex->table_list.first->db;
11377 if (thd->lex->alter_info.flags & Alter_info::ALTER_RENAME &&
11378 (my_strcasecmp(system_charset_info, orig_db, new_dbname) ||
11379 my_strcasecmp(system_charset_info, orig_name, new_tabname)))
11380 {
11381 DBUG_PRINT("info", ("ALTER with RENAME detected"));
11382 /*
11383 Use the original table name when communicating with participant
11384 */
11385 const char* real_rename_db = orig_db;
11386 const char* real_rename_name = orig_name;
11387
11388 /*
11389 Don't log the rename query on participant since that would
11390 cause both an ALTER TABLE RENAME and RENAME TABLE to appear in
11391 the binlog
11392 */
11393 const bool real_rename_log_on_participant = false;
11394 DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
11395 old_dbname, m_tabname,
11396 new_dbname, new_tabname,
11397 true, // real_rename
11398 real_rename_db,
11399 real_rename_name,
11400 real_rename_log_on_participant,
11401 false, // drop_events
11402 true, // create events
11403 true)); // commit_alter
11404 }
11405
11406 DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
11407 old_dbname, m_tabname,
11408 new_dbname, new_tabname,
11409 false, // real_rename
11410 NULL, // real_rename_db
11411 NULL, // real_rename_name
11412 false, // real_rename_log_on_participants
11413 false, // drop_events
11414 true, // create events
11415 true)); // commit_alter
11416 }
11417 break;
11418
11419 case SQLCOM_RENAME_TABLE:
11420 DBUG_PRINT("info", ("SQLCOM_RENAME_TABLE"));
11421
11422 DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11423 old_dbname, m_tabname,
11424 new_dbname, new_tabname,
11425 true, // real_rename
11426 old_dbname, // real_rename_db
11427 m_tabname, // real_rename_name
11428 true, // real_rename_log_on_participants
11429 true, // drop_events
11430 true, // create events
11431 false)); // commit_alter
11432 break;
11433
11434 default:
11435 sql_print_error("Unexpected rename case detected, sql_command: %d",
11436 thd_sql_command(thd));
11437 abort();
11438 break;
11439 }
11440
11441 // Never reached
11442 DBUG_RETURN(HA_ERR_UNSUPPORTED);
11443 }
11444
11445
11446 /**
11447 Delete table from NDB Cluster.
11448 */
11449
11450 static
11451 void
delete_table_drop_share(NDB_SHARE * share,const char * path)11452 delete_table_drop_share(NDB_SHARE* share, const char * path)
11453 {
11454 DBUG_ENTER("delete_table_drop_share");
11455 if (share)
11456 {
11457 native_mutex_lock(&ndbcluster_mutex);
11458 do_drop:
11459 if (share->state != NSS_DROPPED)
11460 {
11461 /*
11462 The share kept by the server has not been freed, free it
11463 */
11464 ndbcluster_mark_share_dropped(share);
11465 /* ndb_share reference create free */
11466 DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u",
11467 share->key_string(), share->use_count));
11468 free_share(&share, TRUE);
11469 }
11470 /* ndb_share reference temporary free */
11471 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
11472 share->key_string(), share->use_count));
11473 free_share(&share, TRUE);
11474 native_mutex_unlock(&ndbcluster_mutex);
11475 }
11476 else if (path)
11477 {
11478 native_mutex_lock(&ndbcluster_mutex);
11479 share= get_share(path, 0, FALSE, TRUE);
11480 if (share)
11481 {
11482 goto do_drop;
11483 }
11484 native_mutex_unlock(&ndbcluster_mutex);
11485 }
11486 DBUG_VOID_RETURN;
11487 }
11488
11489
11490 // Declare adapter functions for Dummy_table_util function
11491 extern bool ndb_fk_util_build_list(THD*, NdbDictionary::Dictionary*,
11492 const NdbDictionary::Table*, List<char>&);
11493 extern void ndb_fk_util_drop_list(THD*, Ndb* ndb, NdbDictionary::Dictionary*, List<char>&);
11494 extern bool ndb_fk_util_drop_table(THD*, Ndb* ndb, NdbDictionary::Dictionary*,
11495 const NdbDictionary::Table*);
11496 extern bool ndb_fk_util_is_mock_name(const char* table_name);
11497
11498 bool
drop_table_and_related(THD * thd,Ndb * ndb,NdbDictionary::Dictionary * dict,const NdbDictionary::Table * table,int drop_flags,bool skip_related)11499 ha_ndbcluster::drop_table_and_related(THD* thd, Ndb* ndb, NdbDictionary::Dictionary* dict,
11500 const NdbDictionary::Table* table,
11501 int drop_flags, bool skip_related)
11502 {
11503 DBUG_ENTER("drop_table_and_related");
11504 DBUG_PRINT("enter", ("cascade_constraints: %d dropdb: %d skip_related: %d",
11505 MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraints),
11506 MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraintsDropDB),
11507 skip_related));
11508
11509 /*
11510 Build list of objects which should be dropped after the table
11511 unless the caller ask to skip dropping related
11512 */
11513 List<char> drop_list;
11514 if (!skip_related &&
11515 !ndb_fk_util_build_list(thd, dict, table, drop_list))
11516 {
11517 DBUG_RETURN(false);
11518 }
11519
11520 // Drop the table
11521 if (dict->dropTableGlobal(*table, drop_flags) != 0)
11522 {
11523 const NdbError& ndb_err = dict->getNdbError();
11524 if (ndb_err.code == 21080 &&
11525 thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
11526 {
11527 /*
11528 Drop was not allowed because table is still referenced by
11529 foreign key(s). Since foreign_key_checks=0 the problem is
11530 worked around by creating a mock table, recreating the foreign
11531 key(s) to point at the mock table and finally dropping
11532 the requested table.
11533 */
11534 if (!ndb_fk_util_drop_table(thd, ndb, dict, table))
11535 {
11536 DBUG_RETURN(false);
11537 }
11538 }
11539 else
11540 {
11541 DBUG_RETURN(false);
11542 }
11543 }
11544
11545 // Drop objects which should be dropped after table
11546 ndb_fk_util_drop_list(thd, ndb, dict, drop_list);
11547
11548 DBUG_RETURN(true);
11549 }
11550
11551
11552 /* static version which does not need a handler */
11553
11554 int
drop_table_impl(THD * thd,ha_ndbcluster * h,Ndb * ndb,const char * path,const char * db,const char * table_name)11555 ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,
11556 const char *path,
11557 const char *db,
11558 const char *table_name)
11559 {
11560 DBUG_ENTER("ha_ndbcluster::drop_table_impl");
11561 NDBDICT *dict= ndb->getDictionary();
11562 int ndb_table_id= 0;
11563 int ndb_table_version= 0;
11564
11565 if (!ndb_schema_dist_is_ready())
11566 {
11567 /* Don't allow drop table unless schema distribution is ready */
11568 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11569 }
11570 /* ndb_share reference temporary */
11571 NDB_SHARE *share= get_share(path, 0, FALSE);
11572 if (share)
11573 {
11574 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
11575 share->key_string(), share->use_count));
11576 }
11577
11578 bool skip_related= false;
11579 int drop_flags = 0;
11580 /* Copying alter can leave #sql table which is parent of old FKs */
11581 if (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
11582 strncmp(table_name, "#sql", 4) == 0)
11583 {
11584 DBUG_PRINT("info", ("Using cascade constraints for ALTER of temp table"));
11585 drop_flags |= NDBDICT::DropTableCascadeConstraints;
11586 // Cascade constraint is used and related will be dropped anyway
11587 skip_related = true;
11588 }
11589
11590 if (thd->lex->sql_command == SQLCOM_DROP_DB)
11591 {
11592 DBUG_PRINT("info", ("Using cascade constraints DB for drop database"));
11593 drop_flags |= NDBDICT::DropTableCascadeConstraintsDropDB;
11594 }
11595
11596 if (thd->lex->sql_command == SQLCOM_TRUNCATE)
11597 {
11598 DBUG_PRINT("info", ("Deleting table for TRUNCATE, skip dropping related"));
11599 skip_related= true;
11600 }
11601
11602 /* Drop the table from NDB */
11603 int res= 0;
11604 if (h && h->m_table)
11605 {
11606 retry_temporary_error1:
11607 if (drop_table_and_related(thd, ndb, dict, h->m_table,
11608 drop_flags, skip_related))
11609 {
11610 ndb_table_id= h->m_table->getObjectId();
11611 ndb_table_version= h->m_table->getObjectVersion();
11612 DBUG_PRINT("info", ("success 1"));
11613 }
11614 else
11615 {
11616 switch (dict->getNdbError().status)
11617 {
11618 case NdbError::TemporaryError:
11619 if (!thd->killed)
11620 goto retry_temporary_error1; // retry indefinitly
11621 break;
11622 default:
11623 break;
11624 }
11625 res= ndb_to_mysql_error(&dict->getNdbError());
11626 DBUG_PRINT("info", ("error(1) %u", res));
11627 }
11628 h->release_metadata(thd, ndb);
11629 }
11630 else
11631 {
11632 ndb->setDatabaseName(db);
11633 while (1)
11634 {
11635 Ndb_table_guard ndbtab_g(dict, table_name);
11636 if (ndbtab_g.get_table())
11637 {
11638 retry_temporary_error2:
11639 if (drop_table_and_related(thd, ndb, dict, ndbtab_g.get_table(),
11640 drop_flags, skip_related))
11641 {
11642 ndb_table_id= ndbtab_g.get_table()->getObjectId();
11643 ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
11644 DBUG_PRINT("info", ("success 2"));
11645 break;
11646 }
11647 else
11648 {
11649 switch (dict->getNdbError().status)
11650 {
11651 case NdbError::TemporaryError:
11652 if (!thd->killed)
11653 goto retry_temporary_error2; // retry indefinitly
11654 break;
11655 default:
11656 if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
11657 {
11658 ndbtab_g.invalidate();
11659 continue;
11660 }
11661 break;
11662 }
11663 }
11664 }
11665 res= ndb_to_mysql_error(&dict->getNdbError());
11666 DBUG_PRINT("info", ("error(2) %u", res));
11667 break;
11668 }
11669 }
11670
11671 if (res)
11672 {
11673 /* the drop table failed for some reason, drop the share anyways */
11674 delete_table_drop_share(share, 0);
11675 DBUG_RETURN(res);
11676 }
11677
11678 /* stop the logging of the dropped table, and cleanup */
11679
11680 /*
11681 drop table is successful even if table does not exist in ndb
11682 and in case table was actually not dropped, there is no need
11683 to force a gcp, and setting the event_name to null will indicate
11684 that there is no event to be dropped
11685 */
11686 int table_dropped= dict->getNdbError().code != 709;
11687
11688 {
11689 if (table_dropped)
11690 {
11691 ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
11692 db, table_name);
11693 }
11694 else
11695 {
11696 /**
11697 * Setting 0,0 will cause ndbcluster_drop_event *not* to be called
11698 */
11699 ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
11700 0, 0);
11701 }
11702 }
11703
11704 if (!IS_TMP_PREFIX(table_name) && share &&
11705 thd->lex->sql_command != SQLCOM_TRUNCATE)
11706 {
11707 ndbcluster_log_schema_op(thd,
11708 thd->query().str, thd->query().length,
11709 share->db, share->table_name,
11710 ndb_table_id, ndb_table_version,
11711 SOT_DROP_TABLE, NULL, NULL);
11712 }
11713
11714 delete_table_drop_share(share, 0);
11715 DBUG_RETURN(0);
11716 }
11717
delete_table(const char * name)11718 int ha_ndbcluster::delete_table(const char *name)
11719 {
11720 THD *thd= current_thd;
11721
11722 DBUG_ENTER("ha_ndbcluster::delete_table");
11723 DBUG_PRINT("enter", ("name: %s", name));
11724
11725 if (thd == injector_thd)
11726 {
11727 /*
11728 Table was dropped remotely is already
11729 dropped inside ndb.
11730 Just drop local files.
11731 */
11732 DBUG_PRINT("info", ("Table is already dropped in NDB"));
11733 delete_table_drop_share(0, name);
11734 DBUG_RETURN(handler::delete_table(name));
11735 }
11736
11737 set_dbname(name);
11738 set_tabname(name);
11739
11740 if (!ndb_schema_dist_is_ready())
11741 {
11742 /* Don't allow drop table unless schema distribution is ready */
11743 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11744 }
11745
11746 if (check_ndb_connection(thd))
11747 {
11748 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11749 }
11750
11751 Thd_ndb *thd_ndb= get_thd_ndb(thd);
11752 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))
11753 {
11754 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11755 }
11756
11757 /*
11758 Drop table in ndb.
11759 If it was already gone it might have been dropped
11760 remotely, give a warning and then drop .ndb file.
11761 */
11762 int error;
11763 Ndb* ndb= thd_ndb->ndb;
11764 if (!(error= drop_table_impl(thd, this, ndb, name,
11765 m_dbname, m_tabname)) ||
11766 error == HA_ERR_NO_SUCH_TABLE)
11767 {
11768 /* Call ancestor function to delete .ndb file */
11769 int error1= handler::delete_table(name);
11770 if (!error)
11771 error= error1;
11772 }
11773
11774 DBUG_RETURN(error);
11775 }
11776
11777
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)11778 void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
11779 ulonglong nb_desired_values,
11780 ulonglong *first_value,
11781 ulonglong *nb_reserved_values)
11782 {
11783 Uint64 auto_value;
11784 THD *thd= current_thd;
11785 DBUG_ENTER("get_auto_increment");
11786 DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
11787 Ndb *ndb= get_ndb(table->in_use);
11788 uint retries= NDB_AUTO_INCREMENT_RETRIES;
11789 int retry_sleep= 30; /* 30 milliseconds, transaction */
11790 for (;;)
11791 {
11792 Ndb_tuple_id_range_guard g(m_share);
11793 if ((m_skip_auto_increment &&
11794 ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
11795 ndb->getAutoIncrementValue(m_table, g.range, auto_value,
11796 Uint32(m_autoincrement_prefetch),
11797 increment, offset))
11798 {
11799 if (--retries && !thd->killed &&
11800 ndb->getNdbError().status == NdbError::TemporaryError)
11801 {
11802 do_retry_sleep(retry_sleep);
11803 continue;
11804 }
11805 const NdbError err= ndb->getNdbError();
11806 sql_print_error("Error %lu in ::get_auto_increment(): %s",
11807 (ulong) err.code, err.message);
11808 *first_value= ~(ulonglong) 0;
11809 DBUG_VOID_RETURN;
11810 }
11811 break;
11812 }
11813 *first_value= (longlong)auto_value;
11814 /* From the point of view of MySQL, NDB reserves one row at a time */
11815 *nb_reserved_values= 1;
11816 DBUG_VOID_RETURN;
11817 }
11818
11819
11820 /**
11821 Constructor for the NDB Cluster table handler .
11822 */
11823
ha_ndbcluster(handlerton * hton,TABLE_SHARE * table_arg)11824 ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
11825 handler(hton, table_arg),
11826 m_thd_ndb(NULL),
11827 m_active_cursor(NULL),
11828 m_table(NULL),
11829 m_ndb_record(0),
11830 m_ndb_hidden_key_record(0),
11831 m_table_info(NULL),
11832 m_share(0),
11833 m_key_fields(NULL),
11834 m_part_info(NULL),
11835 m_user_defined_partitioning(FALSE),
11836 m_use_partition_pruning(FALSE),
11837 m_sorted(FALSE),
11838 m_use_write(FALSE),
11839 m_ignore_dup_key(FALSE),
11840 m_has_unique_index(FALSE),
11841 m_ignore_no_key(FALSE),
11842 m_read_before_write_removal_possible(FALSE),
11843 m_read_before_write_removal_used(FALSE),
11844 m_rows_updated(0),
11845 m_rows_deleted(0),
11846 m_rows_to_insert((ha_rows) 1),
11847 m_rows_inserted((ha_rows) 0),
11848 m_rows_changed((ha_rows) 0),
11849 m_delete_cannot_batch(FALSE),
11850 m_update_cannot_batch(FALSE),
11851 m_skip_auto_increment(TRUE),
11852 m_blobs_pending(0),
11853 m_is_bulk_delete(false),
11854 m_blobs_row_total_size(0),
11855 m_blobs_buffer(0),
11856 m_blobs_buffer_size(0),
11857 m_dupkey((uint) -1),
11858 m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),
11859 m_pushed_join_member(NULL),
11860 m_pushed_join_operation(-1),
11861 m_disable_pushed_join(FALSE),
11862 m_active_query(NULL),
11863 m_pushed_operation(NULL),
11864 m_cond(NULL),
11865 m_multi_cursor(NULL)
11866 {
11867 uint i;
11868
11869 DBUG_ENTER("ha_ndbcluster");
11870
11871 m_tabname[0]= '\0';
11872 m_dbname[0]= '\0';
11873
11874 stats.records= ~(ha_rows)0; // uninitialized
11875 stats.block_size= 1024;
11876
11877 for (i= 0; i < MAX_KEY; i++)
11878 ndb_init_index(m_index[i]);
11879
11880 // make sure is initialized
11881 init_alloc_root(PSI_INSTRUMENT_ME, &m_fk_mem_root, fk_root_block_size, 0);
11882 m_fk_data= NULL;
11883
11884 DBUG_VOID_RETURN;
11885 }
11886
11887
11888 /**
11889 Destructor for NDB Cluster table handler.
11890 */
11891
~ha_ndbcluster()11892 ha_ndbcluster::~ha_ndbcluster()
11893 {
11894 THD *thd= current_thd;
11895 Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
11896 DBUG_ENTER("~ha_ndbcluster");
11897
11898 if (m_share)
11899 {
11900 /* ndb_share reference handler free */
11901 DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u",
11902 m_share->key_string(), m_share->use_count));
11903 free_share(&m_share);
11904 }
11905 release_metadata(thd, ndb);
11906 release_blobs_buffer();
11907
11908 // Check for open cursor/transaction
11909 assert(m_thd_ndb == NULL);
11910
11911 // Discard any generated condition
11912 DBUG_PRINT("info", ("Deleting generated condition"));
11913 if (m_cond)
11914 {
11915 delete m_cond;
11916 m_cond= NULL;
11917 }
11918 DBUG_PRINT("info", ("Deleting pushed joins"));
11919 assert(m_active_query == NULL);
11920 assert(m_active_cursor == NULL);
11921 if (m_pushed_join_operation==PUSHED_ROOT)
11922 {
11923 delete m_pushed_join_member; // Also delete QueryDef
11924 }
11925 m_pushed_join_member= NULL;
11926
11927 // make sure is released
11928 free_root(&m_fk_mem_root, 0);
11929 m_fk_data= NULL;
11930 DBUG_VOID_RETURN;
11931 }
11932
11933
11934 /**
11935 Open a table for further use
11936 - fetch metadata for this table from NDB
11937 - check that table exists
11938
11939 @retval
11940 0 ok
11941 @retval
11942 < 0 Table has changed
11943 */
11944
open(const char * name,int mode,uint test_if_locked)11945 int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
11946 {
11947 THD *thd= current_thd;
11948 int res;
11949 KEY *key;
11950 KEY_PART_INFO *key_part_info;
11951 uint key_parts, i, j;
11952 DBUG_ENTER("ha_ndbcluster::open");
11953 DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d",
11954 name, mode, test_if_locked));
11955
11956 if (table_share->primary_key != MAX_KEY)
11957 {
11958 /*
11959 Setup ref_length to make room for the whole
11960 primary key to be written in the ref variable
11961 */
11962 key= table->key_info+table_share->primary_key;
11963 ref_length= key->key_length;
11964 }
11965 else
11966 {
11967 if (m_user_defined_partitioning)
11968 {
11969 /* Add space for partid in ref */
11970 ref_length+= sizeof(m_part_id);
11971 }
11972 }
11973 DBUG_PRINT("info", ("ref_length: %d", ref_length));
11974
11975 {
11976 char* bitmap_array;
11977 uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;
11978 uint n_keys= table_share->keys + extra_hidden_keys;
11979 uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);
11980 uint map_size= sizeof(MY_BITMAP) * n_keys;
11981 m_key_fields= (MY_BITMAP**)my_malloc(PSI_INSTRUMENT_ME,
11982 ptr_size + map_size,
11983 MYF(MY_WME + MY_ZEROFILL));
11984 if (!m_key_fields)
11985 {
11986 local_close(thd, FALSE);
11987 DBUG_RETURN(1);
11988 }
11989 bitmap_array= ((char*)m_key_fields) + ptr_size;
11990 for (i= 0; i < n_keys; i++)
11991 {
11992 my_bitmap_map *bitbuf= NULL;
11993 bool is_hidden_key= (i == table_share->keys);
11994 m_key_fields[i]= (MY_BITMAP*)bitmap_array;
11995 if (is_hidden_key || (i == table_share->primary_key))
11996 {
11997 m_pk_bitmap_p= m_key_fields[i];
11998 bitbuf= m_pk_bitmap_buf;
11999 }
12000 if (bitmap_init(m_key_fields[i], bitbuf,
12001 table_share->fields, FALSE))
12002 {
12003 m_key_fields[i]= NULL;
12004 local_close(thd, FALSE);
12005 DBUG_RETURN(1);
12006 }
12007 if (!is_hidden_key)
12008 {
12009 key= table->key_info + i;
12010 key_part_info= key->key_part;
12011 key_parts= key->user_defined_key_parts;
12012 for (j= 0; j < key_parts; j++, key_part_info++)
12013 bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);
12014 }
12015 else
12016 {
12017 uint field_no= table_share->fields;
12018 ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));
12019 }
12020 bitmap_array+= sizeof(MY_BITMAP);
12021 }
12022 m_key_fields[i]= NULL;
12023 }
12024
12025 set_dbname(name);
12026 set_tabname(name);
12027
12028 if ((res= check_ndb_connection(thd)) != 0)
12029 {
12030 local_close(thd, FALSE);
12031 DBUG_RETURN(res);
12032 }
12033
12034 // Init table lock structure
12035 /* ndb_share reference handler */
12036 if ((m_share=get_share(name, table, FALSE)) == 0)
12037 {
12038 /**
12039 * No share present...we must create one
12040 */
12041 if (opt_ndb_extra_logging > 19)
12042 {
12043 sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",
12044 name);
12045 }
12046 Ndb* ndb= check_ndb_in_thd(thd);
12047 ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
12048 m_dbname, m_tabname, table);
12049 if ((m_share=get_share(name, table, FALSE)) == 0)
12050 {
12051 local_close(thd, FALSE);
12052 DBUG_RETURN(1);
12053 }
12054 }
12055
12056 DBUG_PRINT("NDB_SHARE", ("%s handler use_count: %u",
12057 m_share->key_string(), m_share->use_count));
12058 thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
12059
12060 if ((res= get_metadata(thd, name)))
12061 {
12062 local_close(thd, FALSE);
12063 DBUG_RETURN(res);
12064 }
12065
12066 if ((res= update_stats(thd, 1)) ||
12067 (res= info(HA_STATUS_CONST)))
12068 {
12069 local_close(thd, TRUE);
12070 DBUG_RETURN(res);
12071 }
12072 if (ndb_binlog_is_read_only())
12073 {
12074 table->db_stat|= HA_READ_ONLY;
12075 sql_print_information("table '%s' opened read only", name);
12076 }
12077 DBUG_RETURN(0);
12078 }
12079
12080 /*
12081 * Support for OPTIMIZE TABLE
12082 * reclaims unused space of deleted rows
12083 * and updates index statistics
12084 */
optimize(THD * thd,HA_CHECK_OPT * check_opt)12085 int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)
12086 {
12087 ulong error, stats_error= 0;
12088 const uint delay= (uint)THDVAR(thd, optimization_delay);
12089
12090 error= ndb_optimize_table(thd, delay);
12091 stats_error= update_stats(thd, 1);
12092 return (error) ? error : stats_error;
12093 }
12094
ndb_optimize_table(THD * thd,uint delay)12095 int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)
12096 {
12097 Thd_ndb *thd_ndb= get_thd_ndb(thd);
12098 Ndb *ndb= thd_ndb->ndb;
12099 NDBDICT *dict= ndb->getDictionary();
12100 int result=0, error= 0;
12101 uint i;
12102 NdbDictionary::OptimizeTableHandle th;
12103 NdbDictionary::OptimizeIndexHandle ih;
12104
12105 DBUG_ENTER("ndb_optimize_table");
12106 if ((error= dict->optimizeTable(*m_table, th)))
12107 {
12108 DBUG_PRINT("info",
12109 ("Optimze table %s returned %d", m_tabname, error));
12110 ERR_RETURN(ndb->getNdbError());
12111 }
12112 while((result= th.next()) == 1)
12113 {
12114 if (thd->killed)
12115 DBUG_RETURN(-1);
12116 my_sleep(1000*delay);
12117 }
12118 if (result == -1 || th.close() == -1)
12119 {
12120 DBUG_PRINT("info",
12121 ("Optimize table %s did not complete", m_tabname));
12122 ERR_RETURN(ndb->getNdbError());
12123 };
12124 for (i= 0; i < MAX_KEY; i++)
12125 {
12126 if (thd->killed)
12127 DBUG_RETURN(-1);
12128 if (m_index[i].status == ACTIVE)
12129 {
12130 const NdbDictionary::Index *index= m_index[i].index;
12131 const NdbDictionary::Index *unique_index= m_index[i].unique_index;
12132
12133 if (index)
12134 {
12135 if ((error= dict->optimizeIndex(*index, ih)))
12136 {
12137 DBUG_PRINT("info",
12138 ("Optimze index %s returned %d",
12139 index->getName(), error));
12140 ERR_RETURN(ndb->getNdbError());
12141
12142 }
12143 while((result= ih.next()) == 1)
12144 {
12145 if (thd->killed)
12146 DBUG_RETURN(-1);
12147 my_sleep(1000*delay);
12148 }
12149 if (result == -1 || ih.close() == -1)
12150 {
12151 DBUG_PRINT("info",
12152 ("Optimize index %s did not complete", index->getName()));
12153 ERR_RETURN(ndb->getNdbError());
12154 }
12155 }
12156 if (unique_index)
12157 {
12158 if ((error= dict->optimizeIndex(*unique_index, ih)))
12159 {
12160 DBUG_PRINT("info",
12161 ("Optimze unique index %s returned %d",
12162 unique_index->getName(), error));
12163 ERR_RETURN(ndb->getNdbError());
12164 }
12165 while((result= ih.next()) == 1)
12166 {
12167 if (thd->killed)
12168 DBUG_RETURN(-1);
12169 my_sleep(1000*delay);
12170 }
12171 if (result == -1 || ih.close() == -1)
12172 {
12173 DBUG_PRINT("info",
12174 ("Optimize index %s did not complete", index->getName()));
12175 ERR_RETURN(ndb->getNdbError());
12176 }
12177 }
12178 }
12179 }
12180 DBUG_RETURN(0);
12181 }
12182
analyze(THD * thd,HA_CHECK_OPT * check_opt)12183 int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)
12184 {
12185 int err;
12186 if ((err= update_stats(thd, 1)) != 0)
12187 return err;
12188 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
12189 THDVAR(thd, index_stat_enable);
12190 if (index_stat_enable)
12191 {
12192 if ((err= analyze_index(thd)) != 0)
12193 return err;
12194 }
12195 return 0;
12196 }
12197
12198 int
analyze_index(THD * thd)12199 ha_ndbcluster::analyze_index(THD *thd)
12200 {
12201 DBUG_ENTER("ha_ndbcluster::analyze_index");
12202
12203 Thd_ndb *thd_ndb= get_thd_ndb(thd);
12204 Ndb *ndb= thd_ndb->ndb;
12205
12206 uint inx_list[MAX_INDEXES];
12207 uint inx_count= 0;
12208
12209 uint inx;
12210 for (inx= 0; inx < table_share->keys; inx++)
12211 {
12212 NDB_INDEX_TYPE idx_type= get_index_type(inx);
12213
12214 if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
12215 idx_type == UNIQUE_ORDERED_INDEX ||
12216 idx_type == ORDERED_INDEX))
12217 {
12218 if (inx_count < MAX_INDEXES)
12219 inx_list[inx_count++]= inx;
12220 }
12221 }
12222
12223 if (inx_count != 0)
12224 {
12225 int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);
12226 if (err != 0)
12227 DBUG_RETURN(err);
12228 }
12229 DBUG_RETURN(0);
12230 }
12231
12232 /*
12233 Set partition info
12234
12235 SYNOPSIS
12236 set_part_info()
12237 part_info
12238
12239 RETURN VALUE
12240 NONE
12241
12242 DESCRIPTION
12243 Set up partition info when handler object created
12244 */
12245
set_part_info(partition_info * part_info,bool early)12246 void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)
12247 {
12248 DBUG_ENTER("ha_ndbcluster::set_part_info");
12249 m_part_info= part_info;
12250 if (!early)
12251 {
12252 m_use_partition_pruning= FALSE;
12253 if (!(m_part_info->part_type == HASH_PARTITION &&
12254 m_part_info->list_of_part_fields &&
12255 !m_part_info->is_sub_partitioned()))
12256 {
12257 /*
12258 PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants
12259 all use MySQL defined partitioning. PARTITION BY KEY uses NDB native
12260 partitioning scheme.
12261 */
12262 m_use_partition_pruning= TRUE;
12263 m_user_defined_partitioning= TRUE;
12264 }
12265 if (m_part_info->part_type == HASH_PARTITION &&
12266 m_part_info->list_of_part_fields &&
12267 m_part_info->num_full_part_fields == 0)
12268 {
12269 /*
12270 CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();
12271 where no primary key is defined uses a hidden key as partition field
12272 and this makes it impossible to use any partition pruning. Partition
12273 pruning requires partitioning based on real fields, also the lack of
12274 a primary key means that all accesses to tables are based on either
12275 full table scans or index scans and they can never be pruned those
12276 scans given that the hidden key is unknown. In write_row, update_row,
12277 and delete_row the normal hidden key handling will fix things.
12278 */
12279 m_use_partition_pruning= FALSE;
12280 }
12281 DBUG_PRINT("info", ("m_use_partition_pruning = %d",
12282 m_use_partition_pruning));
12283 }
12284 DBUG_VOID_RETURN;
12285 }
12286
12287 /**
12288 Close the table
12289 - release resources setup by open()
12290 */
12291
local_close(THD * thd,bool release_metadata_flag)12292 void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)
12293 {
12294 Ndb *ndb;
12295 DBUG_ENTER("ha_ndbcluster::local_close");
12296 if (m_key_fields)
12297 {
12298 MY_BITMAP **inx_bitmap;
12299 for (inx_bitmap= m_key_fields;
12300 (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);
12301 inx_bitmap++)
12302 if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)
12303 bitmap_free(*inx_bitmap);
12304 my_free((char*)m_key_fields, MYF(0));
12305 m_key_fields= NULL;
12306 }
12307 if (m_share)
12308 {
12309 /* ndb_share reference handler free */
12310 DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u",
12311 m_share->key_string(), m_share->use_count));
12312 free_share(&m_share);
12313 }
12314 m_share= 0;
12315 if (release_metadata_flag)
12316 {
12317 ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
12318 release_metadata(thd, ndb);
12319 }
12320 DBUG_VOID_RETURN;
12321 }
12322
close(void)12323 int ha_ndbcluster::close(void)
12324 {
12325 DBUG_ENTER("close");
12326 THD *thd= table->in_use;
12327 local_close(thd, TRUE);
12328 DBUG_RETURN(0);
12329 }
12330
12331
check_ndb_connection(THD * thd) const12332 int ha_ndbcluster::check_ndb_connection(THD* thd) const
12333 {
12334 Ndb *ndb;
12335 DBUG_ENTER("check_ndb_connection");
12336
12337 if (!(ndb= check_ndb_in_thd(thd, true)))
12338 DBUG_RETURN(HA_ERR_NO_CONNECTION);
12339 if (ndb->setDatabaseName(m_dbname))
12340 {
12341 ERR_RETURN(ndb->getNdbError());
12342 }
12343 DBUG_RETURN(0);
12344 }
12345
12346
ndbcluster_close_connection(handlerton * hton,THD * thd)12347 static int ndbcluster_close_connection(handlerton *hton, THD *thd)
12348 {
12349 Thd_ndb *thd_ndb= get_thd_ndb(thd);
12350 DBUG_ENTER("ndbcluster_close_connection");
12351 if (thd_ndb)
12352 {
12353 Thd_ndb::release(thd_ndb);
12354 thd_set_thd_ndb(thd, NULL);
12355 }
12356 DBUG_RETURN(0);
12357 }
12358
12359
12360 /**
12361 Try to discover one table from NDB.
12362 */
12363 static
ndbcluster_discover(handlerton * hton,THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)12364 int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
12365 const char *name,
12366 uchar **frmblob,
12367 size_t *frmlen)
12368 {
12369 int error= 0;
12370 NdbError ndb_error;
12371 size_t len;
12372 uchar* data= NULL;
12373 Ndb* ndb;
12374 char key[FN_REFLEN + 1];
12375 DBUG_ENTER("ndbcluster_discover");
12376 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
12377
12378 // Check if the database directory for the table to discover exists
12379 // as otherwise there is no place to put the discovered .frm file.
12380 build_table_filename(key, sizeof(key) - 1, db, "", "", 0);
12381 const int database_exists= !my_access(key, F_OK);
12382 if (!database_exists)
12383 {
12384 sql_print_information("NDB: Could not find database directory '%s' "
12385 "while trying to discover table '%s'", db, name);
12386 // Can't discover table when database directory does not exist
12387 DBUG_RETURN(1);
12388 }
12389
12390 if (!(ndb= check_ndb_in_thd(thd)))
12391 DBUG_RETURN(HA_ERR_NO_CONNECTION);
12392 if (ndb->setDatabaseName(db))
12393 {
12394 ERR_RETURN(ndb->getNdbError());
12395 }
12396
12397 build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
12398 /* ndb_share reference temporary */
12399 NDB_SHARE* share= get_share(key, 0, FALSE);
12400 if (share)
12401 {
12402 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
12403 share->key_string(), share->use_count));
12404 }
12405 if (share && get_ndb_share_state(share) == NSS_ALTERED)
12406 {
12407 // Frm has been altered on disk, but not yet written to ndb
12408 if (readfrm(key, &data, &len))
12409 {
12410 DBUG_PRINT("error", ("Could not read frm"));
12411 error= 1;
12412 goto err;
12413 }
12414 }
12415 else
12416 {
12417 NDBDICT* dict= ndb->getDictionary();
12418 Ndb_table_guard ndbtab_g(dict, name);
12419 const NDBTAB *tab= ndbtab_g.get_table();
12420 if (!tab)
12421 {
12422 const NdbError err= dict->getNdbError();
12423 if (err.code == 709 || err.code == 723)
12424 {
12425 error= -1;
12426 DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
12427 }
12428 else
12429 {
12430 error= -1;
12431 ndb_error= err;
12432 DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
12433 }
12434 goto err;
12435 }
12436 DBUG_PRINT("info", ("Found table %s", tab->getName()));
12437
12438 len= tab->getFrmLength();
12439 if (len == 0 || tab->getFrmData() == NULL)
12440 {
12441 DBUG_PRINT("error", ("No frm data found."));
12442 error= 1;
12443 goto err;
12444 }
12445
12446 if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))
12447 {
12448 DBUG_PRINT("error", ("Could not unpack table"));
12449 error= 1;
12450 goto err;
12451 }
12452 }
12453 #ifdef HAVE_NDB_BINLOG
12454 if (ndbcluster_check_if_local_table(db, name) &&
12455 !Ndb_dist_priv_util::is_distributed_priv_table(db, name))
12456 {
12457 DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",
12458 db, name));
12459 sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",
12460 db, name);
12461 error= 1;
12462 goto err;
12463 }
12464 #endif
12465 *frmlen= len;
12466 *frmblob= data;
12467
12468 if (share)
12469 {
12470 /* ndb_share reference temporary free */
12471 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12472 share->key_string(), share->use_count));
12473 free_share(&share);
12474 }
12475
12476 DBUG_RETURN(0);
12477 err:
12478 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
12479 if (share)
12480 {
12481 /* ndb_share reference temporary free */
12482 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12483 share->key_string(), share->use_count));
12484 free_share(&share);
12485 }
12486
12487 if (ndb_error.code)
12488 {
12489 ERR_RETURN(ndb_error);
12490 }
12491 DBUG_RETURN(error);
12492 }
12493
12494 /**
12495 Check if a table exists in NDB.
12496 */
12497 static
ndbcluster_table_exists_in_engine(handlerton * hton,THD * thd,const char * db,const char * name)12498 int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd,
12499 const char *db,
12500 const char *name)
12501 {
12502 Ndb* ndb;
12503 DBUG_ENTER("ndbcluster_table_exists_in_engine");
12504 DBUG_PRINT("enter", ("db: %s name: %s", db, name));
12505
12506 if (!(ndb= check_ndb_in_thd(thd)))
12507 DBUG_RETURN(HA_ERR_NO_CONNECTION);
12508 NDBDICT* dict= ndb->getDictionary();
12509 NdbDictionary::Dictionary::List list;
12510 if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
12511 {
12512 ERR_RETURN(dict->getNdbError());
12513 }
12514 for (uint i= 0 ; i < list.count ; i++)
12515 {
12516 NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
12517 if (my_strcasecmp(table_alias_charset, elmt.database, db))
12518 continue;
12519 if (my_strcasecmp(table_alias_charset, elmt.name, name))
12520 continue;
12521 DBUG_PRINT("info", ("Found table"));
12522 DBUG_RETURN(HA_ERR_TABLE_EXIST);
12523 }
12524 DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
12525 }
12526
12527
tables_get_key(const char * entry,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))12528 extern "C" uchar* tables_get_key(const char *entry, size_t *length,
12529 my_bool not_used MY_ATTRIBUTE((unused)))
12530 {
12531 *length= strlen(entry);
12532 return (uchar*) entry;
12533 }
12534
12535
12536 /**
12537 Drop a database in NDB Cluster
12538
12539 @note
12540 add a dummy void function, since stupid handlerton is returning void instead of int...
12541 */
ndbcluster_drop_database_impl(THD * thd,const char * path)12542 int ndbcluster_drop_database_impl(THD *thd, const char *path)
12543 {
12544 DBUG_ENTER("ndbcluster_drop_database");
12545 char dbname[FN_HEADLEN];
12546 Ndb* ndb;
12547 NdbDictionary::Dictionary::List list;
12548 uint i;
12549 char *tabname;
12550 List<char> drop_list;
12551 int ret= 0;
12552 ha_ndbcluster::set_dbname(path, (char *)&dbname);
12553 DBUG_PRINT("enter", ("db: %s", dbname));
12554
12555 if (!(ndb= check_ndb_in_thd(thd)))
12556 DBUG_RETURN(-1);
12557
12558 // List tables in NDB
12559 NDBDICT *dict= ndb->getDictionary();
12560 if (dict->listObjects(list,
12561 NdbDictionary::Object::UserTable) != 0)
12562 DBUG_RETURN(-1);
12563 for (i= 0 ; i < list.count ; i++)
12564 {
12565 NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
12566 DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
12567
12568 // Add only tables that belongs to db
12569 // Ignore Blob part tables - they are deleted when their table
12570 // is deleted.
12571 if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||
12572 IS_NDB_BLOB_PREFIX(elmt.name) ||
12573 ndb_fk_util_is_mock_name(elmt.name))
12574 continue;
12575 DBUG_PRINT("info", ("%s must be dropped", elmt.name));
12576 drop_list.push_back(thd->mem_strdup(elmt.name));
12577 }
12578 // Drop any tables belonging to database
12579 char full_path[FN_REFLEN + 1];
12580 char *tmp= full_path +
12581 build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
12582 if (ndb->setDatabaseName(dbname))
12583 {
12584 ERR_RETURN(ndb->getNdbError());
12585 }
12586 List_iterator_fast<char> it(drop_list);
12587 while ((tabname=it++))
12588 {
12589 tablename_to_filename(tabname, tmp, (uint)(FN_REFLEN - (tmp - full_path)-1));
12590 if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))
12591 {
12592 const NdbError err= dict->getNdbError();
12593 if (err.code != 709 && err.code != 723)
12594 {
12595 ret= ndb_to_mysql_error(&err);
12596 }
12597 }
12598 }
12599
12600 dict->invalidateDbGlobal(dbname);
12601 DBUG_RETURN(ret);
12602 }
12603
12604
12605 /**
12606 @brief Check the given directory for any remaining NDB related
12607 leftovers and try to remove them.
12608
12609 @param path The path of the directory to check
12610
12611 @note This function is called only when all tables which mysqld or NDB
12612 knew about has been removed. Thus anything left behind can be
12613 safely removed.
12614 */
12615
12616 static void
ndbcluster_drop_database_leftovers(const char * path)12617 ndbcluster_drop_database_leftovers(const char* path)
12618 {
12619 DBUG_ENTER("ndbcluster_drop_database_leftovers");
12620 MY_DIR* dirp;
12621 if (!(dirp= my_dir(path,MYF(MY_DONT_SORT))))
12622 {
12623 // The database directory didn't exist, crash in debug since
12624 // something is obviously wrong
12625 assert(false);
12626 DBUG_VOID_RETURN;
12627 }
12628
12629 for (uint i= 0; i < dirp->number_off_files; i++)
12630 {
12631 FILEINFO* file= dirp->dir_entry + i;
12632 DBUG_PRINT("info", ("found: '%s'", file->name));
12633
12634 char* extension= fn_ext(file->name);
12635 DBUG_PRINT("info", ("extension: '%s'", extension));
12636 if (strcmp(extension, ha_ndb_ext))
12637 continue;
12638
12639 char file_path[FN_REFLEN];
12640 strxmov(file_path, path, "/", file->name, NullS);
12641 DBUG_PRINT("info", ("Found leftover .ndb file '%s'! Try to delete it.",
12642 file_path));
12643 if (my_delete_with_symlink(file_path, MYF(0)))
12644 {
12645 // Failed to delete the file. Ignore it since the DROP DATABASE
12646 // will report an error later when it tries to delete the directory
12647 DBUG_PRINT("error", ("Delete of of '%s' failed, my_errno: %d",
12648 file_path, my_errno()));
12649 }
12650 }
12651
12652 my_dirend(dirp);
12653 DBUG_VOID_RETURN;
12654 }
12655
12656
ndbcluster_drop_database(handlerton * hton,char * path)12657 static void ndbcluster_drop_database(handlerton *hton, char *path)
12658 {
12659 THD *thd= current_thd;
12660 DBUG_ENTER("ndbcluster_drop_database");
12661
12662 if (!ndb_schema_dist_is_ready())
12663 {
12664 /* Don't allow drop database unless schema distribution is ready */
12665 DBUG_VOID_RETURN;
12666 }
12667
12668 ndbcluster_drop_database_impl(thd, path);
12669
12670 /*
12671 At this point the mysqld has looped over all the tables it knew
12672 about in the database and dropped them one by one. The above call
12673 to 'ndbcluster_drop_database_impl' has dropped any NDB tables in
12674 the database which mysqld didn't know about(this could potentially
12675 happen if there was a "local" table with same name). This means that
12676 the database directory should be free of anything NDB related.
12677 Double check to make sure nothing is left behind and remove any
12678 leftovers(which according to BUG#44529 could happen after for
12679 example a failed ALTER TABLE).
12680 */
12681 ndbcluster_drop_database_leftovers(path);
12682
12683 char db[FN_REFLEN];
12684 ha_ndbcluster::set_dbname(path, db);
12685 uint32 table_id= 0, table_version= 0;
12686 /*
12687 Since databases aren't real ndb schema object
12688 they don't have any id/version
12689
12690 But since that id/version is used to make sure that event's on SCHEMA_TABLE
12691 is correct, we set random numbers
12692 */
12693 table_id = (uint32)rand();
12694 table_version = (uint32)rand();
12695 ndbcluster_log_schema_op(thd,
12696 thd->query().str, thd->query().length,
12697 db, "", table_id, table_version,
12698 SOT_DROP_DB, NULL, NULL);
12699 DBUG_VOID_RETURN;
12700 }
12701
ndb_create_table_from_engine(THD * thd,const char * db,const char * table_name)12702 int ndb_create_table_from_engine(THD *thd, const char *db,
12703 const char *table_name)
12704 {
12705 // Copy db and table_name to stack buffers since functions used by
12706 // ha_create_table_from_engine may convert to lowercase on some platforms
12707 char db_buf[FN_REFLEN + 1];
12708 char table_name_buf[FN_REFLEN + 1];
12709 my_stpnmov(db_buf, db, sizeof(db_buf));
12710 my_stpnmov(table_name_buf, table_name, sizeof(table_name_buf));
12711
12712 LEX *old_lex= thd->lex, newlex;
12713 thd->lex= &newlex;
12714 newlex.set_current_select(NULL);
12715 lex_start(thd);
12716 int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);
12717 thd->lex= old_lex;
12718 return res;
12719 }
12720
12721
12722 static int
ndbcluster_find_files(handlerton * hton,THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)12723 ndbcluster_find_files(handlerton *hton, THD *thd,
12724 const char *db, const char *path,
12725 const char *wild, bool dir, List<LEX_STRING> *files)
12726 {
12727 DBUG_ENTER("ndbcluster_find_files");
12728 DBUG_PRINT("enter", ("db: %s", db));
12729 { // extra bracket to avoid gcc 2.95.3 warning
12730 uint i;
12731 Thd_ndb *thd_ndb;
12732 Ndb* ndb;
12733 char name[FN_REFLEN + 1];
12734 HASH ndb_tables, ok_tables;
12735 NDBDICT::List list;
12736
12737 if (!(ndb= check_ndb_in_thd(thd)))
12738 DBUG_RETURN(HA_ERR_NO_CONNECTION);
12739 thd_ndb= get_thd_ndb(thd);
12740
12741 if (dir)
12742 DBUG_RETURN(0); // Discover of databases not yet supported
12743
12744 Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);
12745 if (ndb_global_schema_lock_guard.lock())
12746 DBUG_RETURN(HA_ERR_NO_CONNECTION);
12747
12748 // List tables in NDB
12749 NDBDICT *dict= ndb->getDictionary();
12750 if (dict->listObjects(list,
12751 NdbDictionary::Object::UserTable) != 0)
12752 ERR_RETURN(dict->getNdbError());
12753
12754 if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,
12755 (my_hash_get_key)tables_get_key,0,0,
12756 PSI_INSTRUMENT_ME))
12757 {
12758 DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
12759 DBUG_RETURN(-1);
12760 }
12761
12762 if (my_hash_init(&ok_tables, system_charset_info,32,0,0,
12763 (my_hash_get_key)tables_get_key,0,0,
12764 PSI_INSTRUMENT_ME))
12765 {
12766 DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
12767 my_hash_free(&ndb_tables);
12768 DBUG_RETURN(-1);
12769 }
12770
12771 for (i= 0 ; i < list.count ; i++)
12772 {
12773 NDBDICT::List::Element& elmt= list.elements[i];
12774 if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
12775 {
12776 DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
12777 continue;
12778 }
12779 DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
12780
12781 // Add only tables that belongs to db
12782 if (my_strcasecmp(system_charset_info, elmt.database, db))
12783 continue;
12784
12785 // Apply wildcard to list of tables in NDB
12786 if (wild)
12787 {
12788 if (lower_case_table_names)
12789 {
12790 if (wild_case_compare(files_charset_info, elmt.name, wild))
12791 continue;
12792 }
12793 else if (wild_compare(elmt.name,wild,0))
12794 continue;
12795 }
12796 DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));
12797 my_hash_insert(&ndb_tables, (uchar*)thd->mem_strdup(elmt.name));
12798 }
12799
12800 LEX_STRING *file_name;
12801 List_iterator<LEX_STRING> it(*files);
12802 List<char> delete_list;
12803 char *file_name_str;
12804 while ((file_name=it++))
12805 {
12806 bool file_on_disk= FALSE;
12807 DBUG_PRINT("info", ("File : %s", file_name->str));
12808 if (my_hash_search(&ndb_tables,
12809 (const uchar*)file_name->str, file_name->length))
12810 {
12811 build_table_filename(name, sizeof(name) - 1, db,
12812 file_name->str, reg_ext, 0);
12813 if (my_access(name, F_OK))
12814 {
12815 /* No frm for database, table name combination, but
12816 * Cluster says the table with that combination exists.
12817 * Assume frm was deleted, re-discover from engine.
12818 */
12819 DBUG_PRINT("info", ("Table %s listed and need discovery",
12820 file_name->str));
12821 if (ndb_create_table_from_engine(thd, db, file_name->str))
12822 {
12823 push_warning_printf(thd, Sql_condition::SL_WARNING,
12824 ER_TABLE_EXISTS_ERROR,
12825 "Discover of table %s.%s failed",
12826 db, file_name->str);
12827 continue;
12828 }
12829 }
12830 DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));
12831 file_on_disk= TRUE;
12832 }
12833
12834 // Check for .ndb file with this name
12835 build_table_filename(name, sizeof(name) - 1, db,
12836 file_name->str, ha_ndb_ext, 0);
12837 DBUG_PRINT("info", ("Check access for %s", name));
12838 if (my_access(name, F_OK))
12839 {
12840 DBUG_PRINT("info", ("%s did not exist on disk", name));
12841 // .ndb file did not exist on disk, another table type
12842 if (file_on_disk)
12843 {
12844 // Cluster table and an frm file exist, but no .ndb file
12845 // Assume this means the frm is for a local table, and is
12846 // hiding the cluster table in its shadow.
12847 // Ignore this ndb table
12848 uchar *record= my_hash_search(&ndb_tables,
12849 (const uchar*) file_name->str,
12850 file_name->length);
12851 assert(record);
12852 my_hash_delete(&ndb_tables, record);
12853 push_warning_printf(thd, Sql_condition::SL_WARNING,
12854 ER_TABLE_EXISTS_ERROR,
12855 "Local table %s.%s shadows ndb table",
12856 db, file_name->str);
12857 }
12858 continue;
12859 }
12860
12861 /* .ndb file exists */
12862 if (file_on_disk)
12863 {
12864 // File existed in Cluster and has both frm and .ndb files,
12865 // Put in ok_tables list
12866 my_hash_insert(&ok_tables, (uchar*) file_name->str);
12867 continue;
12868 }
12869 DBUG_PRINT("info", ("%s existed on disk", name));
12870 // The .ndb file exists on disk, but it's not in list of tables in cluster
12871 // Verify that handler agrees table is gone.
12872 if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==
12873 HA_ERR_NO_SUCH_TABLE)
12874 {
12875 DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));
12876 it.remove();
12877 if (thd == injector_thd)
12878 {
12879 /*
12880 Don't delete anything when called from
12881 the binlog thread. This is a kludge to avoid
12882 that something is deleted when "Ndb schema dist"
12883 uses find_files() to check for "local tables in db"
12884 */
12885 }
12886 else
12887 // Put in list of tables to remove from disk
12888 delete_list.push_back(thd->mem_strdup(file_name->str));
12889 }
12890 }
12891
12892 if (!thd_ndb->skip_binlog_setup_in_find_files())
12893 {
12894 /* setup logging to binlog for all discovered tables */
12895 char *end, *end1= name +
12896 build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
12897 for (i= 0; i < ok_tables.records; i++)
12898 {
12899 file_name_str= (char*)my_hash_element(&ok_tables, i);
12900 end= end1 +
12901 tablename_to_filename(file_name_str, end1, (uint)(sizeof(name) - (end1 - name)));
12902 ndbcluster_create_binlog_setup(thd, ndb, name, (uint)(end-name),
12903 db, file_name_str, 0);
12904 }
12905 }
12906
12907 // Check for new files to discover
12908 DBUG_PRINT("info", ("Checking for new files to discover"));
12909 List<char> create_list;
12910 for (i= 0 ; i < ndb_tables.records ; i++)
12911 {
12912 file_name_str= (char*) my_hash_element(&ndb_tables, i);
12913 if (!my_hash_search(&ok_tables,
12914 (const uchar*) file_name_str, strlen(file_name_str)))
12915 {
12916 /* Table in Cluster did not have frm or .ndb */
12917 build_table_filename(name, sizeof(name) - 1,
12918 db, file_name_str, reg_ext, 0);
12919 if (my_access(name, F_OK))
12920 {
12921 DBUG_PRINT("info", ("%s must be discovered", file_name_str));
12922 // File is in list of ndb tables and not in ok_tables.
12923 // It is missing an frm file.
12924 // This table need to be created
12925 create_list.push_back(thd->mem_strdup(file_name_str));
12926 }
12927 }
12928 }
12929
12930 if (thd == injector_thd)
12931 {
12932 /*
12933 Don't delete anything when called from
12934 the binlog thread. This is a kludge to avoid
12935 that something is deleted when "Ndb schema dist"
12936 uses find_files() to check for "local tables in db"
12937 */
12938 }
12939 else
12940 {
12941 /*
12942 Delete old files
12943 (.frm files with corresponding .ndb + does not exists in NDB)
12944 */
12945 List_iterator_fast<char> it3(delete_list);
12946 while ((file_name_str= it3++))
12947 {
12948 DBUG_PRINT("info", ("Deleting local files for table '%s.%s'",
12949 db, file_name_str));
12950
12951 // Delete the table and its related files from disk
12952 Ndb_local_schema::Table local_table(thd, db, file_name_str);
12953 local_table.remove_table();
12954
12955 // Flush the table out of ndbapi's dictionary cache
12956 Ndb_table_guard ndbtab_g(ndb->getDictionary(), file_name_str);
12957 ndbtab_g.invalidate();
12958
12959 // Flush the table from table def. cache.
12960 ndb_tdc_close_cached_table(thd, db, file_name_str);
12961
12962 assert(!thd->is_error());
12963 }
12964 }
12965
12966 // Create new files
12967 List_iterator_fast<char> it2(create_list);
12968 while ((file_name_str=it2++))
12969 {
12970 DBUG_PRINT("info", ("Table %s need discovery", file_name_str));
12971 if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)
12972 {
12973 LEX_STRING *tmp_file_name= 0;
12974 tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,
12975 (uint)strlen(file_name_str), TRUE);
12976 files->push_back(tmp_file_name);
12977 }
12978 }
12979
12980 my_hash_free(&ok_tables);
12981 my_hash_free(&ndb_tables);
12982
12983 /* Hide mysql.ndb_schema table */
12984 if (!strcmp(db, NDB_REP_DB))
12985 {
12986 LEX_STRING* file_name;
12987 List_iterator<LEX_STRING> it(*files);
12988 while ((file_name= it++))
12989 {
12990 if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))
12991 {
12992 DBUG_PRINT("info", ("Hiding table '%s.%s'", db, file_name->str));
12993 it.remove();
12994 }
12995 }
12996 }
12997 } // extra bracket to avoid gcc 2.95.3 warning
12998 DBUG_RETURN(0);
12999 }
13000
13001
13002 /**
13003 Check if the given table is a system table which is
13004 supported to store in NDB
13005
13006 */
is_supported_system_table(const char * db,const char * table_name,bool is_sql_layer_system_table)13007 static bool is_supported_system_table(const char *db,
13008 const char *table_name,
13009 bool is_sql_layer_system_table)
13010 {
13011 if (!is_sql_layer_system_table)
13012 {
13013 // No need to check tables which MySQL Server does not
13014 // consider as system tables
13015 return false;
13016 }
13017
13018 if (Ndb_dist_priv_util::is_distributed_priv_table(db, table_name))
13019 {
13020 // Table is supported as distributed system table and should be allowed
13021 // to be stored in NDB
13022 return true;
13023 }
13024
13025 return false;
13026 }
13027
13028
13029 /* Call back after cluster connect */
connect_callback()13030 static int connect_callback()
13031 {
13032 native_mutex_lock(&ndb_util_thread.LOCK);
13033 update_status_variables(NULL, &g_ndb_status,
13034 g_ndb_cluster_connection);
13035 native_cond_broadcast(&ndb_util_thread.COND);
13036 native_mutex_unlock(&ndb_util_thread.LOCK);
13037 return 0;
13038 }
13039
13040 /**
13041 * Components
13042 */
13043 Ndb_util_thread ndb_util_thread;
13044 Ndb_index_stat_thread ndb_index_stat_thread;
13045
13046 extern THD * ndb_create_thd(char * stackptr);
13047
13048 #ifndef NDB_NO_WAIT_SETUP
ndb_wait_setup_func_impl(ulong max_wait)13049 static int ndb_wait_setup_func_impl(ulong max_wait)
13050 {
13051 DBUG_ENTER("ndb_wait_setup_func_impl");
13052
13053 native_mutex_lock(&ndbcluster_mutex);
13054
13055 struct timespec abstime;
13056 set_timespec(&abstime, 1);
13057
13058 while (max_wait &&
13059 (!ndb_setup_complete || !ndb_index_stat_thread.is_setup_complete()))
13060 {
13061 int rc= native_cond_timedwait(&COND_ndb_setup_complete,
13062 &ndbcluster_mutex,
13063 &abstime);
13064 if (rc)
13065 {
13066 if (rc == ETIMEDOUT)
13067 {
13068 DBUG_PRINT("info", ("1s elapsed waiting"));
13069 max_wait--;
13070 set_timespec(&abstime, 1); /* 1 second from now*/
13071 }
13072 else
13073 {
13074 DBUG_PRINT("info", ("Bad native_cond_timedwait rc : %u",
13075 rc));
13076 assert(false);
13077 break;
13078 }
13079 }
13080 }
13081
13082 native_mutex_unlock(&ndbcluster_mutex);
13083
13084 #ifndef NDB_WITHOUT_DIST_PRIV
13085 do
13086 {
13087 /**
13088 * Check if we (might) need a flush privileges
13089 */
13090 THD* thd= current_thd;
13091 bool own_thd= thd == NULL;
13092 if (own_thd)
13093 {
13094 thd= ndb_create_thd((char*)&thd);
13095 if (thd == 0)
13096 break;
13097 }
13098
13099 if (Ndb_dist_priv_util::priv_tables_are_in_ndb(thd))
13100 {
13101 Ndb_local_connection mysqld(thd);
13102 mysqld.raw_run_query("FLUSH PRIVILEGES", sizeof("FLUSH PRIVILEGES"), 0);
13103 }
13104
13105 if (own_thd)
13106 {
13107 // TLS variables should not point to thd anymore.
13108 thd->restore_globals();
13109 delete thd;
13110 }
13111 } while (0);
13112 #endif
13113
13114 DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);
13115 }
13116
13117 int(*ndb_wait_setup_func)(ulong) = 0;
13118 #endif
13119
13120 static int
13121 ndbcluster_make_pushed_join(handlerton *, THD*, const AQP::Join_plan*);
13122
13123 /* Version in composite numerical format */
13124 static Uint32 ndb_version = NDB_VERSION_D;
13125 static MYSQL_SYSVAR_UINT(
13126 version, /* name */
13127 ndb_version, /* var */
13128 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
13129 "Compile version for ndbcluster",
13130 NULL, /* check func. */
13131 NULL, /* update func. */
13132 0, /* default */
13133 0, /* min */
13134 0, /* max */
13135 0 /* block */
13136 );
13137
13138 /* Version in ndb-Y.Y.Y[-status] format */
13139 static char* ndb_version_string = (char*)NDB_NDB_VERSION_STRING;
13140 static MYSQL_SYSVAR_STR(
13141 version_string, /* name */
13142 ndb_version_string, /* var */
13143 PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
13144 "Compile version string for ndbcluster",
13145 NULL, /* check func. */
13146 NULL, /* update func. */
13147 NULL /* default */
13148 );
13149
13150 extern int ndb_dictionary_is_mysqld;
13151
13152 Uint32 recv_thread_num_cpus;
13153 static int ndb_recv_thread_cpu_mask_check_str(const char *str);
13154 static void ndb_recv_thread_cpu_mask_update();
13155 handlerton* ndbcluster_hton;
13156
13157
13158 /*
13159 Handle failure from ndbcluster_init() by printing error
13160 message(s) and exit the MySQL Server.
13161
13162 NOTE! This is done to avoid the current undefined behaviour which occurs
13163 when an error return code from plugin's init() function just disables
13164 the plugin.
13165 */
13166
13167 static
ndbcluster_init_abort(const char * error)13168 void ndbcluster_init_abort(const char* error)
13169 {
13170 ndb_log_error("%s", error);
13171 ndb_log_error("Failed to initialize ndbcluster, aborting!");
13172 ndb_log_error("Use --skip-ndbcluster to start without ndbcluster.");
13173 exit(1);
13174 }
13175
13176
13177 /*
13178 Initialize the ndbcluster storage engine
13179 */
13180
13181 static
ndbcluster_init(void * p)13182 int ndbcluster_init(void* p)
13183 {
13184 DBUG_ENTER("ndbcluster_init");
13185
13186 assert(!ndbcluster_inited);
13187
13188 #ifdef HAVE_NDB_BINLOG
13189 /* Check const alignment */
13190 assert(DependencyTracker::InvalidTransactionId ==
13191 Ndb_binlog_extra_row_info::InvalidTransactionId);
13192
13193 if (global_system_variables.binlog_format == BINLOG_FORMAT_STMT)
13194 {
13195 /* Set global to mixed - note that this is not the default,
13196 * but the current global value
13197 */
13198 global_system_variables.binlog_format = BINLOG_FORMAT_MIXED;
13199 sql_print_information("NDB: Changed global value of binlog_format from STATEMENT to MIXED");
13200
13201 }
13202 #endif
13203 if (ndb_util_thread.init() ||
13204 DBUG_EVALUATE_IF("ndbcluster_init_fail1", true, false))
13205 {
13206 ndbcluster_init_abort("Failed to initialize NDB Util");
13207 }
13208
13209 if (ndb_index_stat_thread.init())
13210 {
13211 ndbcluster_init_abort("Failed to initialize NDB Index Stat");
13212 }
13213
13214 native_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
13215 native_cond_init(&COND_ndb_setup_complete);
13216 ndb_dictionary_is_mysqld= 1;
13217 ndb_setup_complete= 0;
13218 ndbcluster_hton= (handlerton *)p;
13219 ndbcluster_global_schema_lock_init(ndbcluster_hton);
13220
13221 {
13222 handlerton *h= ndbcluster_hton;
13223 h->state= SHOW_OPTION_YES;
13224 h->db_type= DB_TYPE_NDBCLUSTER;
13225 h->close_connection= ndbcluster_close_connection;
13226 h->commit= ndbcluster_commit;
13227 h->rollback= ndbcluster_rollback;
13228 h->create= ndbcluster_create_handler; /* Create a new handler */
13229 h->drop_database= ndbcluster_drop_database; /* Drop a database */
13230 h->panic= ndbcluster_end; /* Panic call */
13231 h->show_status= ndbcluster_show_status; /* Show status */
13232 h->get_tablespace= ndbcluster_get_tablespace; /* Get ts for old ver */
13233 h->alter_tablespace= ndbcluster_alter_tablespace; /* Show status */
13234 h->partition_flags= ndbcluster_partition_flags; /* Partition flags */
13235 #if MYSQL_VERSION_ID >= 50501
13236 h->fill_is_table= ndbcluster_fill_is_table;
13237 #else
13238 h->fill_files_table= ndbcluster_fill_files_table;
13239 #endif
13240 ndbcluster_binlog_init(h);
13241 h->flags= HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED |
13242 HTON_NO_BINLOG_ROW_OPT;
13243 h->discover= ndbcluster_discover;
13244 h->find_files= ndbcluster_find_files;
13245 h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
13246 h->make_pushed_join= ndbcluster_make_pushed_join;
13247 h->is_supported_system_table = is_supported_system_table;
13248 }
13249
13250 // Initialize NdbApi
13251 ndb_init_internal();
13252
13253 /* allocate connection resources and connect to cluster */
13254 const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);
13255 if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,
13256 opt_ndb_cluster_connection_pool,
13257 (global_opti_node_select & 1),
13258 opt_ndb_connectstring,
13259 opt_ndb_nodeid,
13260 opt_ndb_recv_thread_activation_threshold))
13261 {
13262 ndbcluster_init_abort("Failed to initialize connection(s)");
13263 }
13264
13265 /* Translate recv thread cpu mask if set */
13266 if (ndb_recv_thread_cpu_mask_check_str(opt_ndb_recv_thread_cpu_mask) == 0)
13267 {
13268 if (recv_thread_num_cpus)
13269 {
13270 ndb_recv_thread_cpu_mask_update();
13271 }
13272 }
13273
13274 (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,
13275 (my_hash_get_key) ndbcluster_get_key,0,0,
13276 PSI_INSTRUMENT_ME);
13277 (void) my_hash_init(&ndbcluster_dropped_tables,table_alias_charset,32,0,0,
13278 (my_hash_get_key) ndbcluster_get_key,0,0,
13279 PSI_INSTRUMENT_ME);
13280 /* start the ndb injector thread */
13281 if (ndbcluster_binlog_start())
13282 {
13283 ndbcluster_init_abort("Failed to start NDB Binlog");
13284 }
13285
13286 // Create utility thread
13287 if (ndb_util_thread.start())
13288 {
13289 ndbcluster_init_abort("Failed to start NDB Util");
13290 }
13291
13292 // Create index statistics thread
13293 if (ndb_index_stat_thread.start() ||
13294 DBUG_EVALUATE_IF("ndbcluster_init_fail2", true, false))
13295 {
13296 ndbcluster_init_abort("Failed to start NDB Index Stat");
13297 }
13298
13299 #ifndef NDB_NO_WAIT_SETUP
13300 ndb_wait_setup_func= ndb_wait_setup_func_impl;
13301 #endif
13302
13303 memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));
13304
13305 ndbcluster_inited= 1;
13306
13307 DBUG_RETURN(0); // OK
13308 }
13309
13310 #ifndef NDEBUG
13311 static
13312 const char*
get_share_state_string(NDB_SHARE_STATE s)13313 get_share_state_string(NDB_SHARE_STATE s)
13314 {
13315 switch(s) {
13316 case NSS_INITIAL:
13317 return "NSS_INITIAL";
13318 case NSS_ALTERED:
13319 return "NSS_ALTERED";
13320 case NSS_DROPPED:
13321 return "NSS_DROPPED";
13322 }
13323 assert(false);
13324 return "<unknown>";
13325 }
13326 #endif
13327
13328 int ndbcluster_binlog_end(THD *thd);
13329
ndbcluster_end(handlerton * hton,ha_panic_function type)13330 static int ndbcluster_end(handlerton *hton, ha_panic_function type)
13331 {
13332 DBUG_ENTER("ndbcluster_end");
13333
13334 if (!ndbcluster_inited)
13335 DBUG_RETURN(0);
13336 ndbcluster_inited= 0;
13337
13338 /* Stop index stat thread */
13339 ndb_index_stat_thread.stop();
13340
13341 /* wait for util and binlog thread to finish */
13342 ndbcluster_binlog_end(NULL);
13343
13344 {
13345 native_mutex_lock(&ndbcluster_mutex);
13346 uint save = ndbcluster_open_tables.records; (void)save;
13347 while (ndbcluster_open_tables.records)
13348 {
13349 NDB_SHARE *share=
13350 (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);
13351 #ifndef NDEBUG
13352 fprintf(stderr,
13353 "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
13354 share->key_string(), share->use_count,
13355 get_share_state_string(share->state),
13356 (uint)share->state);
13357 #endif
13358 ndbcluster_real_free_share(&share);
13359 }
13360 native_mutex_unlock(&ndbcluster_mutex);
13361 assert(save == 0);
13362 }
13363 my_hash_free(&ndbcluster_open_tables);
13364
13365 {
13366 native_mutex_lock(&ndbcluster_mutex);
13367 uint save = ndbcluster_dropped_tables.records; (void)save;
13368 while (ndbcluster_dropped_tables.records)
13369 {
13370 NDB_SHARE *share=
13371 (NDB_SHARE*) my_hash_element(&ndbcluster_dropped_tables, 0);
13372 #ifndef NDEBUG
13373 fprintf(stderr,
13374 "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
13375 share->key_string(), share->use_count,
13376 get_share_state_string(share->state),
13377 (uint)share->state);
13378 /**
13379 * For unknown reasons...the dist-priv tables linger here
13380 * TODO investigate why
13381 */
13382 if (Ndb_dist_priv_util::is_distributed_priv_table(share->db,
13383 share->table_name))
13384 {
13385 save--;
13386 }
13387 #endif
13388 ndbcluster_real_free_share(&share);
13389 }
13390 native_mutex_unlock(&ndbcluster_mutex);
13391 assert(save == 0);
13392 }
13393 my_hash_free(&ndbcluster_dropped_tables);
13394
13395 ndb_index_stat_end();
13396 ndbcluster_disconnect();
13397
13398 ndbcluster_global_schema_lock_deinit();
13399 ndb_util_thread.deinit();
13400 ndb_index_stat_thread.deinit();
13401
13402 native_mutex_destroy(&ndbcluster_mutex);
13403 native_cond_destroy(&COND_ndb_setup_complete);
13404
13405 // Cleanup NdbApi
13406 ndb_end_internal();
13407
13408 DBUG_RETURN(0);
13409 }
13410
print_error(int error,myf errflag)13411 void ha_ndbcluster::print_error(int error, myf errflag)
13412 {
13413 DBUG_ENTER("ha_ndbcluster::print_error");
13414 DBUG_PRINT("enter", ("error: %d", error));
13415
13416 if (error == HA_ERR_NO_PARTITION_FOUND)
13417 m_part_info->print_no_partition_found(table);
13418 else
13419 {
13420 if (error == HA_ERR_FOUND_DUPP_KEY &&
13421 (table == NULL || table->file == NULL))
13422 {
13423 /*
13424 This is a sideffect of 'ndbcluster_print_error' (called from
13425 'ndbcluster_commit' and 'ndbcluster_rollback') which realises
13426 that it "knows nothing" and creates a brand new ha_ndbcluster
13427 in order to be able to call the print_error() function.
13428 Unfortunately the new ha_ndbcluster hasn't been open()ed
13429 and thus table pointer etc. is not set. Since handler::print_error()
13430 will use that pointer without checking for NULL(it naturally
13431 assumes an error can only be returned when the handler is open)
13432 this would crash the mysqld unless it's handled here.
13433 */
13434 my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);
13435 DBUG_VOID_RETURN;
13436 }
13437 if (error == ER_CANT_DROP_FIELD_OR_KEY)
13438 {
13439 /*
13440 Called on drop unknown FK by server when algorithm=copy or
13441 by handler when algorithm=inplace. In both cases the error
13442 was already printed in ha_ndb_ddl_fk.cc.
13443 */
13444 THD* thd= NULL;
13445 if (table != NULL &&
13446 (thd= table->in_use) != NULL &&
13447 thd->lex != NULL &&
13448 thd->lex->sql_command == SQLCOM_ALTER_TABLE)
13449 {
13450 DBUG_VOID_RETURN;
13451 }
13452 assert(false);
13453 }
13454
13455 handler::print_error(error, errflag);
13456 }
13457 DBUG_VOID_RETURN;
13458 }
13459
13460
13461 /**
13462 Set a given location from full pathname to database name.
13463 */
13464
set_dbname(const char * path_name,char * dbname)13465 void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
13466 {
13467 ndb_set_dbname(path_name, dbname);
13468 }
13469
13470 /**
13471 Set m_dbname from full pathname to table file.
13472 */
13473
set_dbname(const char * path_name)13474 void ha_ndbcluster::set_dbname(const char *path_name)
13475 {
13476 ndb_set_dbname(path_name, m_dbname);
13477 }
13478
13479 /**
13480 Set a given location from full pathname to table file.
13481 */
13482
13483 void
set_tabname(const char * path_name,char * tabname)13484 ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
13485 {
13486 ndb_set_tabname(path_name, tabname);
13487 }
13488
13489 /**
13490 Set m_tabname from full pathname to table file.
13491 */
13492
set_tabname(const char * path_name)13493 void ha_ndbcluster::set_tabname(const char *path_name)
13494 {
13495 ndb_set_tabname(path_name, m_tabname);
13496 }
13497
13498
13499 /*
13500 If there are no stored stats, should we do a tree-dive on all db
13501 nodes. The result is fairly good but does mean a round-trip.
13502 */
13503 static const bool g_ndb_records_in_range_tree_dive= false;
13504
13505 /* Determine roughly how many records are in the range specified */
13506 ha_rows
records_in_range(uint inx,key_range * min_key,key_range * max_key)13507 ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
13508 key_range *max_key)
13509 {
13510 KEY *key_info= table->key_info + inx;
13511 uint key_length= key_info->key_length;
13512 NDB_INDEX_TYPE idx_type= get_index_type(inx);
13513
13514 DBUG_ENTER("records_in_range");
13515 // Prevent partial read of hash indexes by returning HA_POS_ERROR
13516 if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
13517 ((min_key && min_key->length < key_length) ||
13518 (max_key && max_key->length < key_length)))
13519 DBUG_RETURN(HA_POS_ERROR);
13520
13521 // Read from hash index with full key
13522 // This is a "const" table which returns only one record!
13523 if ((idx_type != ORDERED_INDEX) &&
13524 ((min_key && min_key->length == key_length) &&
13525 (max_key && max_key->length == key_length) &&
13526 (min_key->key==max_key->key ||
13527 memcmp(min_key->key, max_key->key, key_length)==0)))
13528 DBUG_RETURN(1);
13529
13530 // XXX why this if
13531 if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
13532 idx_type == UNIQUE_ORDERED_INDEX ||
13533 idx_type == ORDERED_INDEX))
13534 {
13535 THD *thd= current_thd;
13536 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
13537 THDVAR(thd, index_stat_enable);
13538
13539 if (index_stat_enable)
13540 {
13541 ha_rows rows= HA_POS_ERROR;
13542 int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);
13543 if (err == 0)
13544 {
13545 /**
13546 * optmizer thinks that all values < 2 are exact...but
13547 * but we don't provide exact statistics
13548 */
13549 if (rows < 2)
13550 rows = 2;
13551 DBUG_RETURN(rows);
13552 }
13553 if (err != 0 &&
13554 /* no stats is not unexpected error */
13555 err != NdbIndexStat::NoIndexStats &&
13556 /* warning was printed at first error */
13557 err != NdbIndexStat::MyHasError &&
13558 /* stats thread aborted request */
13559 err != NdbIndexStat::MyAbortReq)
13560 {
13561 push_warning_printf(thd, Sql_condition::SL_WARNING,
13562 ER_CANT_GET_STAT, /* pun? */
13563 "index stats (RIR) for key %s:"
13564 " unexpected error %d",
13565 key_info->name, err);
13566 }
13567 /*fall through*/
13568 }
13569
13570 if (g_ndb_records_in_range_tree_dive)
13571 {
13572 NDB_INDEX_DATA& d=m_index[inx];
13573 const NDBINDEX* index= d.index;
13574 Ndb *ndb= get_ndb(thd);
13575 NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;
13576 NdbTransaction* trans=NULL;
13577 int res=0;
13578 Uint64 rows;
13579
13580 do
13581 {
13582 if ((trans=active_trans) == NULL ||
13583 trans->commitStatus() != NdbTransaction::Started)
13584 {
13585 DBUG_PRINT("info", ("no active trans"));
13586 if (! (trans=ndb->startTransaction()))
13587 ERR_BREAK(ndb->getNdbError(), res);
13588 }
13589
13590 /* Create an IndexBound struct for the keys */
13591 NdbIndexScanOperation::IndexBound ib;
13592 compute_index_bounds(ib,
13593 key_info,
13594 min_key,
13595 max_key,
13596 0);
13597
13598 ib.range_no= 0;
13599
13600 NdbIndexStat is;
13601 if (is.records_in_range(index,
13602 trans,
13603 d.ndb_record_key,
13604 m_ndb_record,
13605 &ib,
13606 0,
13607 &rows,
13608 0) == -1)
13609 ERR_BREAK(is.getNdbError(), res);
13610 } while (0);
13611
13612 if (trans != active_trans && rows == 0)
13613 rows = 1;
13614 if (trans != active_trans && trans != NULL)
13615 ndb->closeTransaction(trans);
13616 if (res == 0)
13617 DBUG_RETURN(rows);
13618 /*fall through*/
13619 }
13620 }
13621
13622 /* Use simple heuristics to estimate fraction
13623 of 'stats.record' returned from range.
13624 */
13625 do
13626 {
13627 if (stats.records == ~(ha_rows)0 || stats.records == 0)
13628 {
13629 /* Refresh statistics, only read from datanodes if 'use_exact_count' */
13630 THD *thd= current_thd;
13631 if (update_stats(thd, THDVAR(thd, use_exact_count)))
13632 break;
13633 }
13634
13635 Uint64 rows;
13636 Uint64 table_rows= stats.records;
13637 size_t eq_bound_len= 0;
13638 size_t min_key_length= (min_key) ? min_key->length : 0;
13639 size_t max_key_length= (max_key) ? max_key->length : 0;
13640
13641 // Might have an closed/open range bound:
13642 // Low range open
13643 if (!min_key_length)
13644 {
13645 rows= (!max_key_length)
13646 ? table_rows // No range was specified
13647 : table_rows/10; // -oo .. <high range> -> 10% selectivity
13648 }
13649 // High range open
13650 else if (!max_key_length)
13651 {
13652 rows= table_rows/10; // <low range>..oo -> 10% selectivity
13653 }
13654 else
13655 {
13656 size_t bounds_len= MIN(min_key_length,max_key_length);
13657 uint eq_bound_len= 0;
13658 uint eq_bound_offs= 0;
13659
13660 KEY_PART_INFO* key_part= key_info->key_part;
13661 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
13662 for (; key_part != end; key_part++)
13663 {
13664 uint part_length= key_part->store_length;
13665 if (eq_bound_offs+part_length > bounds_len ||
13666 memcmp(&min_key->key[eq_bound_offs],
13667 &max_key->key[eq_bound_offs],
13668 part_length))
13669 {
13670 break;
13671 }
13672 eq_bound_len+= key_part->length;
13673 eq_bound_offs+= part_length;
13674 }
13675
13676 if (!eq_bound_len)
13677 {
13678 rows= table_rows/20; // <low range>..<high range> -> 5%
13679 }
13680 else
13681 {
13682 // Has an equality range on a leading part of 'key_length':
13683 // - Assume reduced selectivity for non-unique indexes
13684 // by decreasing 'eq_fraction' by 20%
13685 // - Assume equal selectivity for all eq_parts in key.
13686
13687 double eq_fraction = (double)(eq_bound_len) / key_length;
13688 if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
13689 eq_fraction/= 1.20;
13690 if (eq_fraction >= 1.0) // Exact match -> 1 row
13691 DBUG_RETURN(1);
13692
13693 rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));
13694 if (rows > (table_rows/50)) // EQ-range: Max 2% of rows
13695 rows= (table_rows/50);
13696
13697 if (min_key_length > eq_bound_offs)
13698 rows/= 2;
13699 if (max_key_length > eq_bound_offs)
13700 rows/= 2;
13701 }
13702 }
13703
13704 // Make sure that EQ is preferred even if row-count is low
13705 if (eq_bound_len && rows < 2) // At least 2 rows as not exact
13706 rows= 2;
13707 else if (rows < 3)
13708 rows= 3;
13709 DBUG_RETURN(MIN(rows,table_rows));
13710 } while (0);
13711
13712 DBUG_RETURN(10); /* Poor guess when you don't know anything */
13713 }
13714
table_flags(void) const13715 ulonglong ha_ndbcluster::table_flags(void) const
13716 {
13717 THD *thd= current_thd;
13718 ulonglong f=
13719 HA_REC_NOT_IN_SEQ |
13720 HA_NULL_IN_KEY |
13721 HA_AUTO_PART_KEY |
13722 HA_NO_PREFIX_CHAR_KEYS |
13723 HA_CAN_GEOMETRY |
13724 HA_CAN_BIT_FIELD |
13725 HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
13726 HA_PARTIAL_COLUMN_READ |
13727 HA_HAS_OWN_BINLOGGING |
13728 HA_BINLOG_ROW_CAPABLE |
13729 HA_HAS_RECORDS |
13730 HA_READ_BEFORE_WRITE_REMOVAL |
13731 0;
13732
13733 /*
13734 To allow for logging of ndb tables during stmt based logging;
13735 flag cabablity, but also turn off flag for OWN_BINLOGGING
13736 */
13737 if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
13738 f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
13739
13740 /*
13741 Allow MySQL Server to decide that STATEMENT logging should be used
13742 for the distributed privilege tables. NOTE! This is a workaround
13743 for generic problem with forcing STATEMENT logging see BUG16482501.
13744 */
13745 if (Ndb_dist_priv_util::is_distributed_priv_table(m_dbname,m_tabname))
13746 f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
13747
13748 /**
13749 * To maximize join pushability we want const-table
13750 * optimization blocked if 'ndb_join_pushdown= on'
13751 */
13752 if (THDVAR(thd, join_pushdown))
13753 f= f | HA_BLOCK_CONST_TABLE;
13754
13755 return f;
13756 }
13757
table_type() const13758 const char * ha_ndbcluster::table_type() const
13759 {
13760 return("NDBCLUSTER");
13761 }
max_supported_record_length() const13762 uint ha_ndbcluster::max_supported_record_length() const
13763 {
13764 return NDB_MAX_TUPLE_SIZE;
13765 }
max_supported_keys() const13766 uint ha_ndbcluster::max_supported_keys() const
13767 {
13768 return MAX_KEY;
13769 }
max_supported_key_parts() const13770 uint ha_ndbcluster::max_supported_key_parts() const
13771 {
13772 return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
13773 }
max_supported_key_length() const13774 uint ha_ndbcluster::max_supported_key_length() const
13775 {
13776 return NDB_MAX_KEY_SIZE;
13777 }
max_supported_key_part_length(HA_CREATE_INFO * create_info MY_ATTRIBUTE ((unused))) const13778 uint ha_ndbcluster::max_supported_key_part_length(HA_CREATE_INFO
13779 *create_info MY_ATTRIBUTE((unused))) const
13780 {
13781 return NDB_MAX_KEY_SIZE;
13782 }
low_byte_first() const13783 bool ha_ndbcluster::low_byte_first() const
13784 {
13785 #ifdef WORDS_BIGENDIAN
13786 return FALSE;
13787 #else
13788 return TRUE;
13789 #endif
13790 }
index_type(uint key_number)13791 const char* ha_ndbcluster::index_type(uint key_number)
13792 {
13793 switch (get_index_type(key_number)) {
13794 case ORDERED_INDEX:
13795 case UNIQUE_ORDERED_INDEX:
13796 case PRIMARY_KEY_ORDERED_INDEX:
13797 return "BTREE";
13798 case UNIQUE_INDEX:
13799 case PRIMARY_KEY_INDEX:
13800 default:
13801 return "HASH";
13802 }
13803 }
13804
table_cache_type()13805 uint8 ha_ndbcluster::table_cache_type()
13806 {
13807 DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
13808 DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
13809 }
13810
13811 /**
13812 Retrieve the commit count for the table object.
13813
13814 @param thd Thread context.
13815 @param norm_name Normalized path to the table.
13816 @param[out] commit_count Commit count for the table.
13817
13818 @return 0 on success.
13819 @return 1 if an error occured.
13820 */
13821
ndb_get_commitcount(THD * thd,char * norm_name,Uint64 * commit_count)13822 uint ndb_get_commitcount(THD *thd, char *norm_name,
13823 Uint64 *commit_count)
13824 {
13825 char dbname[NAME_LEN + 1];
13826 NDB_SHARE *share;
13827 DBUG_ENTER("ndb_get_commitcount");
13828
13829 DBUG_PRINT("enter", ("name: %s", norm_name));
13830 native_mutex_lock(&ndbcluster_mutex);
13831 if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
13832 (const uchar*) norm_name,
13833 strlen(norm_name))))
13834 {
13835 native_mutex_unlock(&ndbcluster_mutex);
13836 DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
13837 norm_name));
13838 DBUG_RETURN(1);
13839 }
13840 /* ndb_share reference temporary, free below */
13841 share->use_count++;
13842 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
13843 share->key_string(), share->use_count));
13844 native_mutex_unlock(&ndbcluster_mutex);
13845
13846 native_mutex_lock(&share->mutex);
13847 if (opt_ndb_cache_check_time > 0)
13848 {
13849 if (share->commit_count != 0)
13850 {
13851 DBUG_PRINT("info", ("Getting commit_count: %llu from share",
13852 share->commit_count));
13853 *commit_count= share->commit_count;
13854 native_mutex_unlock(&share->mutex);
13855 /* ndb_share reference temporary free */
13856 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
13857 share->key_string(), share->use_count));
13858 free_share(&share);
13859 DBUG_RETURN(0);
13860 }
13861 }
13862 DBUG_PRINT("info", ("Get commit_count from NDB"));
13863 Ndb *ndb;
13864 if (!(ndb= check_ndb_in_thd(thd)))
13865 DBUG_RETURN(1);
13866
13867 ha_ndbcluster::set_dbname(norm_name, dbname);
13868 if (ndb->setDatabaseName(dbname))
13869 {
13870 ERR_RETURN(ndb->getNdbError());
13871 }
13872 uint lock= share->commit_count_lock;
13873 native_mutex_unlock(&share->mutex);
13874
13875 struct Ndb_statistics stat;
13876 {
13877 char tblname[NAME_LEN + 1];
13878 ha_ndbcluster::set_tabname(norm_name, tblname);
13879 Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);
13880 if (ndbtab_g.get_table() == 0
13881 || ndb_get_table_statistics(thd, NULL,
13882 FALSE,
13883 ndb,
13884 ndbtab_g.get_table()->getDefaultRecord(),
13885 &stat))
13886 {
13887 /* ndb_share reference temporary free */
13888 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
13889 share->key_string(), share->use_count));
13890 free_share(&share);
13891 DBUG_RETURN(1);
13892 }
13893 }
13894
13895 native_mutex_lock(&share->mutex);
13896 if (share->commit_count_lock == lock)
13897 {
13898 DBUG_PRINT("info", ("Setting commit_count: %llu", stat.commit_count));
13899 share->commit_count= stat.commit_count;
13900 *commit_count= stat.commit_count;
13901 }
13902 else
13903 {
13904 DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
13905 *commit_count= 0;
13906 }
13907 native_mutex_unlock(&share->mutex);
13908 /* ndb_share reference temporary free */
13909 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
13910 share->key_string(), share->use_count));
13911 free_share(&share);
13912 DBUG_RETURN(0);
13913 }
13914
13915
13916 /**
13917 Check if a cached query can be used.
13918
13919 This is done by comparing the supplied engine_data to commit_count of
13920 the table.
13921
13922 The commit_count is either retrieved from the share for the table, where
13923 it has been cached by the util thread. If the util thread is not started,
13924 NDB has to be contacetd to retrieve the commit_count, this will introduce
13925 a small delay while waiting for NDB to answer.
13926
13927
13928 @param thd thread handle
13929 @param full_name normalized path to the table in the canonical
13930 format.
13931 @param full_name_len length of the normalized path to the table.
13932 @param engine_data parameter retrieved when query was first inserted into
13933 the cache. If the value of engine_data is changed,
13934 all queries for this table should be invalidated.
13935
13936 @retval
13937 TRUE Yes, use the query from cache
13938 @retval
13939 FALSE No, don't use the cached query, and if engine_data
13940 has changed, all queries for this table should be invalidated
13941
13942 */
13943
13944 static my_bool
ndbcluster_cache_retrieval_allowed(THD * thd,char * full_name,uint full_name_len,ulonglong * engine_data)13945 ndbcluster_cache_retrieval_allowed(THD *thd,
13946 char *full_name, uint full_name_len,
13947 ulonglong *engine_data)
13948 {
13949 Uint64 commit_count;
13950 char dbname[NAME_LEN + 1];
13951 char tabname[NAME_LEN + 1];
13952
13953 ha_ndbcluster::set_dbname(full_name, dbname);
13954 ha_ndbcluster::set_tabname(full_name, tabname);
13955
13956 DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
13957 DBUG_PRINT("enter", ("dbname: %s, tabname: %s",
13958 dbname, tabname));
13959
13960 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
13961 {
13962 /* Don't allow qc to be used if table has been previously
13963 modified in transaction */
13964 if (!check_ndb_in_thd(thd))
13965 DBUG_RETURN(FALSE);
13966 Thd_ndb *thd_ndb= get_thd_ndb(thd);
13967 if (!thd_ndb->changed_tables.is_empty())
13968 {
13969 NDB_SHARE* share;
13970 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
13971 while ((share= it++))
13972 {
13973 if (strcmp(share->table_name, tabname) == 0 &&
13974 strcmp(share->db, dbname) == 0)
13975 {
13976 DBUG_PRINT("exit", ("No, transaction has changed table"));
13977 DBUG_RETURN(FALSE);
13978 }
13979 }
13980 }
13981 }
13982
13983 if (ndb_get_commitcount(thd, full_name, &commit_count))
13984 {
13985 *engine_data= 0; /* invalidate */
13986 DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
13987 DBUG_RETURN(FALSE);
13988 }
13989 DBUG_PRINT("info", ("engine_data: %llu, commit_count: %llu",
13990 *engine_data, commit_count));
13991 if (commit_count == 0)
13992 {
13993 *engine_data= 0; /* invalidate */
13994 DBUG_PRINT("exit", ("No, local commit has been performed"));
13995 DBUG_RETURN(FALSE);
13996 }
13997 else if (*engine_data != commit_count)
13998 {
13999 *engine_data= commit_count; /* invalidate */
14000 DBUG_PRINT("exit", ("No, commit_count has changed"));
14001 DBUG_RETURN(FALSE);
14002 }
14003
14004 DBUG_PRINT("exit", ("OK to use cache, engine_data: %llu",
14005 *engine_data));
14006 DBUG_RETURN(TRUE);
14007 }
14008
14009
14010 /**
14011 Register a table for use in the query cache.
14012
14013 Fetch the commit_count for the table and return it in engine_data,
14014 this will later be used to check if the table has changed, before
14015 the cached query is reused.
14016
14017 @param thd thread handle
14018 @param full_name normalized path to the table in the
14019 canonical format.
14020 @param full_name_len length of the normalized path to the table.
14021 @param engine_callback function to be called before using cache on
14022 this table
14023 @param[out] engine_data commit_count for this table
14024
14025 @retval
14026 TRUE Yes, it's ok to cahce this query
14027 @retval
14028 FALSE No, don't cach the query
14029 */
14030
14031 my_bool
register_query_cache_table(THD * thd,char * full_name,size_t full_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)14032 ha_ndbcluster::register_query_cache_table(THD *thd,
14033 char *full_name,
14034 size_t full_name_len,
14035 qc_engine_callback *engine_callback,
14036 ulonglong *engine_data)
14037 {
14038 Uint64 commit_count;
14039
14040 DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
14041 DBUG_PRINT("enter",("dbname: %s, tabname: %s",
14042 m_dbname, m_tabname));
14043
14044 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
14045 {
14046 /* Don't allow qc to be used if table has been previously
14047 modified in transaction */
14048 Thd_ndb *thd_ndb= get_thd_ndb(thd);
14049 if (!thd_ndb->changed_tables.is_empty())
14050 {
14051 assert(m_share);
14052 NDB_SHARE* share;
14053 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
14054 while ((share= it++))
14055 {
14056 if (m_share == share)
14057 {
14058 DBUG_PRINT("exit", ("No, transaction has changed table"));
14059 DBUG_RETURN(FALSE);
14060 }
14061 }
14062 }
14063 }
14064
14065 if (ndb_get_commitcount(thd, full_name, &commit_count))
14066 {
14067 *engine_data= 0;
14068 DBUG_PRINT("exit", ("Error, could not get commitcount"));
14069 DBUG_RETURN(FALSE);
14070 }
14071 *engine_data= commit_count;
14072 *engine_callback= ndbcluster_cache_retrieval_allowed;
14073 DBUG_PRINT("exit", ("commit_count: %llu", commit_count));
14074 DBUG_RETURN(commit_count > 0);
14075 }
14076
14077
ndbcluster_get_key(NDB_SHARE * share,size_t * length,my_bool)14078 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
14079 my_bool)
14080 {
14081 *length= share->key_length();
14082 return (uchar*) share->key_string();
14083 }
14084
14085
14086 #ifndef NDEBUG
14087
print_ndbcluster_open_tables()14088 static void print_ndbcluster_open_tables()
14089 {
14090 DBUG_LOCK_FILE;
14091 fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
14092 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
14093 {
14094 NDB_SHARE* share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
14095 share->print("", DBUG_FILE);
14096 }
14097 fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
14098 DBUG_UNLOCK_FILE;
14099 }
14100
14101 #endif
14102
14103
14104 #define dbug_print_open_tables() \
14105 DBUG_EXECUTE("info", \
14106 print_ndbcluster_open_tables(););
14107
14108
14109 /*
14110 For some reason a share is still around, try to salvage the situation
14111 by closing all cached tables. If the share still exists, there is an
14112 error somewhere but only report this to the error log. Keep this
14113 "trailing share" but rename it since there are still references to it
14114 to avoid segmentation faults. There is a risk that the memory for
14115 this trailing share leaks.
14116
14117 Must be called with previous native_mutex_lock(&ndbcluster_mutex)
14118 */
handle_trailing_share(THD * thd,NDB_SHARE * share)14119 int handle_trailing_share(THD *thd, NDB_SHARE *share)
14120 {
14121 static ulong trailing_share_id= 0;
14122 DBUG_ENTER("handle_trailing_share");
14123
14124 /* ndb_share reference temporary, free below */
14125 ++share->use_count;
14126 if (opt_ndb_extra_logging > 9)
14127 sql_print_information ("handle_trailing_share: %s use_count: %u",
14128 share->key_string(), share->use_count);
14129 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
14130 share->key_string(), share->use_count));
14131 native_mutex_unlock(&ndbcluster_mutex);
14132
14133 ndb_tdc_close_cached_table(thd, share->db, share->table_name);
14134
14135 native_mutex_lock(&ndbcluster_mutex);
14136 /* ndb_share reference temporary free */
14137 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
14138 share->key_string(), share->use_count));
14139 if (!--share->use_count)
14140 {
14141 if (opt_ndb_extra_logging > 9)
14142 sql_print_information ("handle_trailing_share: %s use_count: %u",
14143 share->key_string(), share->use_count);
14144 if (opt_ndb_extra_logging)
14145 sql_print_information("NDB_SHARE: trailing share %s, "
14146 "released by close_cached_tables",
14147 share->key_string());
14148 ndbcluster_real_free_share(&share);
14149 DBUG_RETURN(0);
14150 }
14151 if (opt_ndb_extra_logging > 9)
14152 sql_print_information ("handle_trailing_share: %s use_count: %u",
14153 share->key_string(), share->use_count);
14154
14155 /*
14156 share still exists, if share has not been dropped by server
14157 release that share
14158 */
14159 if (share->state != NSS_DROPPED)
14160 {
14161 ndbcluster_mark_share_dropped(share);
14162 /* ndb_share reference create free */
14163 DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u",
14164 share->key_string(), share->use_count));
14165 --share->use_count;
14166 if (opt_ndb_extra_logging > 9)
14167 sql_print_information ("handle_trailing_share: %s use_count: %u",
14168 share->key_string(), share->use_count);
14169
14170 if (share->use_count == 0)
14171 {
14172 if (opt_ndb_extra_logging)
14173 sql_print_information("NDB_SHARE: trailing share %s, "
14174 "released after NSS_DROPPED check",
14175 share->key_string());
14176 ndbcluster_real_free_share(&share);
14177 DBUG_RETURN(0);
14178 }
14179 }
14180
14181 DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",
14182 share->key_string(), share->use_count, (long) share->op));
14183 /*
14184 Ignore table shares only opened by util thread
14185 */
14186 if (!((share->use_count == 1) && share->util_thread))
14187 {
14188 sql_print_warning("NDB_SHARE: %s already exists use_count=%d."
14189 " Moving away for safety, but possible memleak.",
14190 share->key_string(), share->use_count);
14191 }
14192 dbug_print_open_tables();
14193
14194 /*
14195 Ndb share has not been released as it should
14196 */
14197 #ifdef NOT_YET
14198 assert(FALSE);
14199 #endif
14200
14201 /*
14202 This is probably an error. We can however save the situation
14203 at the cost of a possible mem leak, by "renaming" the share
14204 - First remove from hash
14205 */
14206 my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
14207
14208 {
14209 /*
14210 Give the leaked share a new name using a running number
14211 */
14212 char leak_name_buf[16]; // strlen("#leak4294967295")
14213 my_snprintf(leak_name_buf, sizeof(leak_name_buf),
14214 "#leak%lu", trailing_share_id++);
14215 share->key = NDB_SHARE::create_key(leak_name_buf);
14216 // Note that share->db, share->table_name as well
14217 // as share->shadow_table->s->db etc. points into the memory
14218 // which share->key pointed to before the memory for leak key
14219 // was allocated, so it's not a good time to free the old key
14220 // here.
14221 }
14222 /* Keep it for possible the future trailing free */
14223 my_hash_insert(&ndbcluster_open_tables, (uchar*) share);
14224
14225 DBUG_RETURN(0);
14226 }
14227
14228
14229 int
ndbcluster_rename_share(THD * thd,NDB_SHARE * share,NDB_SHARE_KEY * new_key)14230 ndbcluster_rename_share(THD *thd, NDB_SHARE *share, NDB_SHARE_KEY* new_key)
14231 {
14232 DBUG_ENTER("ndbcluster_rename_share");
14233 native_mutex_lock(&ndbcluster_mutex);
14234 DBUG_PRINT("enter", ("share->key: '%s'", share->key_string()));
14235 DBUG_PRINT("enter", ("new_key: '%s'", NDB_SHARE::key_get_key(new_key)));
14236
14237 // Handle the case where NDB_SHARE with new_key already exists
14238 {
14239 NDB_SHARE *tmp =
14240 (NDB_SHARE*)my_hash_search(&ndbcluster_open_tables,
14241 NDB_SHARE::key_get_key(new_key),
14242 NDB_SHARE::key_get_length(new_key));
14243 if (tmp)
14244 {
14245 handle_trailing_share(thd, tmp);
14246 }
14247 }
14248
14249 /* remove the share from hash */
14250 my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
14251
14252 /* save old key if insert should fail */
14253 NDB_SHARE_KEY *old_key= share->key;
14254
14255 share->key= new_key;
14256
14257 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14258 {
14259 DBUG_PRINT("error", ("Failed to insert %s", share->key_string()));
14260 // Catch this unlikely error in debug
14261 assert(false);
14262 share->key= old_key;
14263 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14264 {
14265 sql_print_error("ndbcluster_rename_share: failed to recover %s",
14266 share->key_string());
14267 DBUG_PRINT("error", ("Failed to reinsert share with old name %s",
14268 share->key_string()));
14269 }
14270 native_mutex_unlock(&ndbcluster_mutex);
14271 DBUG_RETURN(-1);
14272 }
14273
14274 DBUG_PRINT("info", ("setting db and table_name to point at new key"));
14275 share->db= NDB_SHARE::key_get_db_name(share->key);
14276 share->table_name= NDB_SHARE::key_get_table_name(share->key);
14277
14278 Ndb_event_data *event_data= share->get_event_data_ptr();
14279 if (event_data && event_data->shadow_table)
14280 {
14281 if (!IS_TMP_PREFIX(share->table_name))
14282 {
14283 DBUG_PRINT("info", ("Renaming shadow table"));
14284 // Allocate new strings for db and table_name for shadow_table
14285 // in event_data's MEM_ROOT(where the shadow_table itself is allocated)
14286 // NOTE! This causes a slight memory leak since the already existing
14287 // strings are not release until the mem_root is eventually
14288 // released.
14289 lex_string_copy(&event_data->mem_root,
14290 &event_data->shadow_table->s->db,
14291 share->db);
14292 lex_string_copy(&event_data->mem_root,
14293 &event_data->shadow_table->s->table_name,
14294 share->table_name);
14295 }
14296 else
14297 {
14298 DBUG_PRINT("info", ("Name is temporary, skip rename of shadow table"));
14299 /**
14300 * we don't rename the table->s here
14301 * that is used by injector
14302 * as we don't know if all events has been processed
14303 * This will be dropped anyway
14304 */
14305 }
14306 }
14307 /* else rename will be handled when the ALTER event comes */
14308
14309 // Print share after rename
14310 dbug_print_share("renamed share:", share);
14311
14312 native_mutex_unlock(&ndbcluster_mutex);
14313 DBUG_RETURN(0);
14314 }
14315
14316 /*
14317 Increase refcount on existing share.
14318 Always returns share and cannot fail.
14319 */
ndbcluster_get_share(NDB_SHARE * share)14320 NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
14321 {
14322 native_mutex_lock(&ndbcluster_mutex);
14323 share->use_count++;
14324
14325 dbug_print_open_tables();
14326 dbug_print_share("ndbcluster_get_share:", share);
14327 if (opt_ndb_extra_logging > 9)
14328 sql_print_information ("ndbcluster_get_share: %s use_count: %u",
14329 share->key_string(), share->use_count);
14330 native_mutex_unlock(&ndbcluster_mutex);
14331 return share;
14332 }
14333
14334
14335
14336 NDB_SHARE*
create(const char * key,TABLE * table)14337 NDB_SHARE::create(const char* key, TABLE* table)
14338 {
14339 NDB_SHARE* share;
14340 if (!(share= (NDB_SHARE*) my_malloc(PSI_INSTRUMENT_ME,
14341 sizeof(*share),
14342 MYF(MY_WME | MY_ZEROFILL))))
14343 return NULL;
14344
14345 share->flags= 0;
14346 share->state= NSS_INITIAL;
14347
14348 /* Allocates enough space for key, db, and table_name */
14349 share->key= NDB_SHARE::create_key(key);
14350
14351 share->db= NDB_SHARE::key_get_db_name(share->key);
14352 share->table_name= NDB_SHARE::key_get_table_name(share->key);
14353
14354 thr_lock_init(&share->lock);
14355 native_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
14356 share->commit_count= 0;
14357 share->commit_count_lock= 0;
14358
14359 #ifdef HAVE_NDB_BINLOG
14360 share->m_cfn_share= NULL;
14361 #endif
14362
14363 share->op= 0;
14364 share->new_op= 0;
14365 share->event_data= 0;
14366
14367 if (ndbcluster_binlog_init_share(current_thd, share, table))
14368 {
14369 DBUG_PRINT("error", ("get_share: %s could not init share", key));
14370 assert(share->event_data == NULL);
14371 NDB_SHARE::destroy(share);
14372 return NULL;
14373 }
14374
14375 return share;
14376 }
14377
14378
14379 static inline
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists)14380 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
14381 bool create_if_not_exists)
14382 {
14383 NDB_SHARE *share;
14384 DBUG_ENTER("ndbcluster_get_share");
14385 DBUG_PRINT("enter", ("key: '%s'", key));
14386
14387 if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
14388 (const uchar*) key,
14389 strlen(key))))
14390 {
14391 if (!create_if_not_exists)
14392 {
14393 DBUG_PRINT("error", ("get_share: %s does not exist", key));
14394 DBUG_RETURN(0);
14395 }
14396
14397 if (!(share= NDB_SHARE::create(key, table)))
14398 {
14399 DBUG_PRINT("error", ("get_share: failed to alloc share"));
14400 my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));
14401 DBUG_RETURN(0);
14402 }
14403
14404 // Insert the new share in list of open shares
14405 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14406 {
14407 NDB_SHARE::destroy(share);
14408 DBUG_RETURN(0);
14409 }
14410 }
14411 share->use_count++;
14412 if (opt_ndb_extra_logging > 9)
14413 sql_print_information ("ndbcluster_get_share: %s use_count: %u",
14414 share->key_string(), share->use_count);
14415
14416 dbug_print_open_tables();
14417 dbug_print_share("ndbcluster_get_share:", share);
14418 DBUG_RETURN(share);
14419 }
14420
14421
14422 /**
14423 Get NDB_SHARE for key
14424
14425 Returns share for key, and increases the refcount on the share.
14426
14427 @param create_if_not_exists, creates share if it does not already exist
14428 @param have_lock, ndbcluster_mutex already locked
14429 */
14430
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists,bool have_lock)14431 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
14432 bool create_if_not_exists,
14433 bool have_lock)
14434 {
14435 NDB_SHARE *share;
14436 DBUG_ENTER("ndbcluster_get_share");
14437 DBUG_PRINT("enter", ("key: '%s', create_if_not_exists: %d, have_lock: %d",
14438 key, create_if_not_exists, have_lock));
14439
14440 if (!have_lock)
14441 native_mutex_lock(&ndbcluster_mutex);
14442
14443 share= ndbcluster_get_share(key, table, create_if_not_exists);
14444
14445 if (!have_lock)
14446 native_mutex_unlock(&ndbcluster_mutex);
14447
14448 DBUG_RETURN(share);
14449 }
14450
ndbcluster_real_free_share(NDB_SHARE ** share)14451 void ndbcluster_real_free_share(NDB_SHARE **share)
14452 {
14453 DBUG_ENTER("ndbcluster_real_free_share");
14454 dbug_print_share("ndbcluster_real_free_share:", *share);
14455
14456 if (opt_ndb_extra_logging > 9)
14457 sql_print_information ("ndbcluster_real_free_share: %s use_count: %u",
14458 (*share)->key_string(), (*share)->use_count);
14459
14460 ndb_index_stat_free(*share);
14461
14462 bool found= false;
14463 if ((* share)->state == NSS_DROPPED)
14464 {
14465 found= my_hash_delete(&ndbcluster_dropped_tables, (uchar*) *share) == 0;
14466
14467 // If this is a 'trailing share', it might still be 'open'
14468 my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);
14469 }
14470 else
14471 {
14472 found= my_hash_delete(&ndbcluster_open_tables, (uchar*) *share) == 0;
14473 }
14474 assert(found);
14475
14476 NDB_SHARE::destroy(*share);
14477 *share= 0;
14478
14479 dbug_print_open_tables();
14480 DBUG_VOID_RETURN;
14481 }
14482
14483
ndbcluster_free_share(NDB_SHARE ** share,bool have_lock)14484 void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
14485 {
14486 if (!have_lock)
14487 native_mutex_lock(&ndbcluster_mutex);
14488 if (!--(*share)->use_count)
14489 {
14490 if (opt_ndb_extra_logging > 9)
14491 sql_print_information ("ndbcluster_free_share: %s use_count: %u",
14492 (*share)->key_string(), (*share)->use_count);
14493 ndbcluster_real_free_share(share);
14494 }
14495 else
14496 {
14497 if (opt_ndb_extra_logging > 9)
14498 sql_print_information ("ndbcluster_free_share: %s use_count: %u",
14499 (*share)->key_string(), (*share)->use_count);
14500 dbug_print_open_tables();
14501 dbug_print_share("ndbcluster_free_share:", *share);
14502 }
14503 if (!have_lock)
14504 native_mutex_unlock(&ndbcluster_mutex);
14505 }
14506
14507 void
ndbcluster_mark_share_dropped(NDB_SHARE * share)14508 ndbcluster_mark_share_dropped(NDB_SHARE* share)
14509 {
14510 share->state= NSS_DROPPED;
14511 if (my_hash_delete(&ndbcluster_open_tables, (uchar*) share) == 0)
14512 {
14513 my_hash_insert(&ndbcluster_dropped_tables, (uchar*) share);
14514 }
14515 else
14516 {
14517 assert(false);
14518 }
14519 if (opt_ndb_extra_logging > 9)
14520 {
14521 sql_print_information ("ndbcluster_mark_share_dropped: %s use_count: %u",
14522 share->key_string(), share->use_count);
14523 }
14524 }
14525
14526 struct ndb_table_statistics_row {
14527 Uint64 rows;
14528 Uint64 commits;
14529 Uint32 size;
14530 Uint64 fixed_mem;
14531 Uint64 var_mem;
14532 };
14533
update_stats(THD * thd,bool do_read_stat,uint part_id)14534 int ha_ndbcluster::update_stats(THD *thd,
14535 bool do_read_stat,
14536 uint part_id)
14537 {
14538 struct Ndb_statistics stat;
14539 Thd_ndb *thd_ndb= get_thd_ndb(thd);
14540 DBUG_ENTER("ha_ndbcluster::update_stats");
14541 do
14542 {
14543 if (m_share && !do_read_stat)
14544 {
14545 native_mutex_lock(&m_share->mutex);
14546 stat= m_share->stat;
14547 native_mutex_unlock(&m_share->mutex);
14548
14549 assert(stat.row_count != ~(ha_rows)0); // should never be invalid
14550
14551 /* Accept shared cached statistics if row_count is valid. */
14552 if (stat.row_count != ~(ha_rows)0)
14553 break;
14554 }
14555
14556 /* Request statistics from datanodes */
14557 Ndb *ndb= thd_ndb->ndb;
14558 if (ndb->setDatabaseName(m_dbname))
14559 {
14560 set_my_errno(HA_ERR_OUT_OF_MEM);
14561 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
14562 }
14563 if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,
14564 m_ndb_record, &stat,
14565 part_id))
14566 {
14567 DBUG_RETURN(err);
14568 }
14569
14570 /* Update shared statistics with fresh data */
14571 if (m_share)
14572 {
14573 native_mutex_lock(&m_share->mutex);
14574 m_share->stat= stat;
14575 native_mutex_unlock(&m_share->mutex);
14576 }
14577 break;
14578 }
14579 while(0);
14580
14581 int no_uncommitted_rows_count= 0;
14582 if (m_table_info && !thd_ndb->m_error)
14583 {
14584 m_table_info->records= stat.row_count;
14585 m_table_info->last_count= thd_ndb->count;
14586 no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;
14587 }
14588 stats.mean_rec_length= stat.row_size;
14589 stats.data_file_length= stat.fragment_memory;
14590 stats.records= stat.row_count + no_uncommitted_rows_count;
14591 stats.max_data_file_length= stat.fragment_extent_space;
14592 stats.delete_length= stat.fragment_extent_free_space;
14593
14594 DBUG_PRINT("exit", ("stats.records: %d "
14595 "stat->row_count: %d "
14596 "no_uncommitted_rows_count: %d"
14597 "stat->fragment_extent_space: %u "
14598 "stat->fragment_extent_free_space: %u",
14599 (int)stats.records,
14600 (int)stat.row_count,
14601 (int)no_uncommitted_rows_count,
14602 (uint)stat.fragment_extent_space,
14603 (uint)stat.fragment_extent_free_space));
14604 DBUG_RETURN(0);
14605 }
14606
14607 /**
14608 Update 'row_count' in shared table statistcs if any rows where
14609 inserted/deleted by the local transaction related to specified
14610 'local_stat'.
14611 Should be called when transaction has succesfully commited its changes.
14612 */
14613 static
modify_shared_stats(NDB_SHARE * share,Ndb_local_table_statistics * local_stat)14614 void modify_shared_stats(NDB_SHARE *share,
14615 Ndb_local_table_statistics *local_stat)
14616 {
14617 if (local_stat->no_uncommitted_rows_count)
14618 {
14619 native_mutex_lock(&share->mutex);
14620 assert(share->stat.row_count != ~(ha_rows)0);// should never be invalid
14621 if (share->stat.row_count != ~(ha_rows)0)
14622 {
14623 DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",
14624 share->table_name, (ulong) share->stat.row_count,
14625 local_stat->no_uncommitted_rows_count));
14626 share->stat.row_count=
14627 ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)
14628 ? share->stat.row_count+local_stat->no_uncommitted_rows_count
14629 : 0;
14630 }
14631 native_mutex_unlock(&share->mutex);
14632 local_stat->no_uncommitted_rows_count= 0;
14633 }
14634 }
14635
14636 /* If part_id contains a legal partition id, ndbstat returns the
14637 partition-statistics pertaining to that partition only.
14638 Otherwise, it returns the table-statistics,
14639 which is an aggregate over all partitions of that table.
14640 */
14641 static
14642 int
ndb_get_table_statistics(THD * thd,ha_ndbcluster * file,bool report_error,Ndb * ndb,const NdbRecord * record,struct Ndb_statistics * ndbstat,uint part_id)14643 ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,
14644 const NdbRecord *record,
14645 struct Ndb_statistics * ndbstat,
14646 uint part_id)
14647 {
14648 Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
14649 NdbTransaction* pTrans;
14650 NdbError error;
14651 int retries= 100;
14652 int reterr= 0;
14653 int retry_sleep= 30; /* 30 milliseconds */
14654 const char *dummyRowPtr;
14655 NdbOperation::GetValueSpec extraGets[8];
14656 Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;
14657 Uint32 size, fragid;
14658
14659 DBUG_ENTER("ndb_get_table_statistics");
14660
14661 assert(record != 0);
14662
14663 /* We use the passed in NdbRecord just to get access to the
14664 table, we mask out any/all columns it may have and add
14665 our reads as extraGets. This is necessary as they are
14666 all pseudo-columns
14667 */
14668 extraGets[0].column= NdbDictionary::Column::ROW_COUNT;
14669 extraGets[0].appStorage= &rows;
14670 extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;
14671 extraGets[1].appStorage= &commits;
14672 extraGets[2].column= NdbDictionary::Column::ROW_SIZE;
14673 extraGets[2].appStorage= &size;
14674 extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;
14675 extraGets[3].appStorage= &fixed_mem;
14676 extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;
14677 extraGets[4].appStorage= &var_mem;
14678 extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;
14679 extraGets[5].appStorage= &ext_space;
14680 extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;
14681 extraGets[6].appStorage= &free_ext_space;
14682 extraGets[7].column= NdbDictionary::Column::FRAGMENT;
14683 extraGets[7].appStorage= &fragid;
14684
14685 const Uint32 codeWords= 1;
14686 Uint32 codeSpace[ codeWords ];
14687 NdbInterpretedCode code(NULL, // Table is irrelevant
14688 &codeSpace[0],
14689 codeWords);
14690 if ((code.interpret_exit_last_row() != 0) ||
14691 (code.finalise() != 0))
14692 {
14693 reterr= code.getNdbError().code;
14694 DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
14695 error.code, error.message));
14696 DBUG_RETURN(reterr);
14697 }
14698
14699 do
14700 {
14701 Uint32 count= 0;
14702 Uint64 sum_rows= 0;
14703 Uint64 sum_commits= 0;
14704 Uint64 sum_row_size= 0;
14705 Uint64 sum_mem= 0;
14706 Uint64 sum_ext_space= 0;
14707 Uint64 sum_free_ext_space= 0;
14708 NdbScanOperation*pOp;
14709 int check;
14710
14711 if ((pTrans= ndb->startTransaction()) == NULL)
14712 {
14713 error= ndb->getNdbError();
14714 goto retry;
14715 }
14716
14717 NdbScanOperation::ScanOptions options;
14718 options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |
14719 NdbScanOperation::ScanOptions::SO_GETVALUE |
14720 NdbScanOperation::ScanOptions::SO_INTERPRETED;
14721 /* Set batch_size=1, as we need only one row per fragment. */
14722 options.batch= 1;
14723 options.extraGetValues= &extraGets[0];
14724 options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]);
14725 options.interpretedCode= &code;
14726
14727 if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,
14728 empty_mask,
14729 &options,
14730 sizeof(NdbScanOperation::ScanOptions))) == NULL)
14731 {
14732 error= pTrans->getNdbError();
14733 goto retry;
14734 }
14735 thd_ndb->m_scan_count++;
14736 thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);
14737
14738 thd_ndb->m_execute_count++;
14739 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
14740 if (pTrans->execute(NdbTransaction::NoCommit,
14741 NdbOperation::AbortOnError,
14742 TRUE) == -1)
14743 {
14744 error= pTrans->getNdbError();
14745 goto retry;
14746 }
14747
14748 while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)
14749 {
14750 DBUG_PRINT("info", ("nextResult rows: %llu, commits: %llu"
14751 "fixed_mem_size %llu var_mem_size %llu "
14752 "fragmentid %u extent_space %llu free_extent_space %llu",
14753 rows, commits, fixed_mem, var_mem, fragid,
14754 ext_space, free_ext_space));
14755
14756 if ((part_id != ~(uint)0) && fragid != part_id)
14757 {
14758 continue;
14759 }
14760
14761 sum_rows+= rows;
14762 sum_commits+= commits;
14763 if (sum_row_size < size)
14764 sum_row_size= size;
14765 sum_mem+= fixed_mem + var_mem;
14766 count++;
14767 sum_ext_space += ext_space;
14768 sum_free_ext_space += free_ext_space;
14769
14770 if ((part_id != ~(uint)0) && fragid == part_id)
14771 {
14772 break;
14773 }
14774 }
14775
14776 if (check == -1)
14777 {
14778 error= pOp->getNdbError();
14779 goto retry;
14780 }
14781
14782 pOp->close(TRUE);
14783
14784 ndb->closeTransaction(pTrans);
14785
14786 ndbstat->row_count= sum_rows;
14787 ndbstat->commit_count= sum_commits;
14788 ndbstat->row_size= (ulong)sum_row_size;
14789 ndbstat->fragment_memory= sum_mem;
14790 ndbstat->fragment_extent_space= sum_ext_space;
14791 ndbstat->fragment_extent_free_space= sum_free_ext_space;
14792
14793 DBUG_PRINT("exit", ("records: %llu commits: %llu row_size: %llu "
14794 "mem: %llu allocated: %llu free: %llu count: %u",
14795 sum_rows, sum_commits, sum_row_size,
14796 sum_mem, sum_ext_space, sum_free_ext_space, count));
14797
14798 DBUG_RETURN(0);
14799 retry:
14800 if(report_error)
14801 {
14802 if (file && pTrans)
14803 {
14804 reterr= file->ndb_err(pTrans);
14805 }
14806 else
14807 {
14808 const NdbError& tmp= error;
14809 ERR_PRINT(tmp);
14810 reterr= ndb_to_mysql_error(&tmp);
14811 }
14812 }
14813 else
14814 reterr= error.code;
14815
14816 if (pTrans)
14817 {
14818 ndb->closeTransaction(pTrans);
14819 pTrans= NULL;
14820 }
14821 if (error.status == NdbError::TemporaryError &&
14822 retries-- && !thd->killed)
14823 {
14824 do_retry_sleep(retry_sleep);
14825 continue;
14826 }
14827 break;
14828 } while(1);
14829 DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
14830 error.code, error.message));
14831 DBUG_RETURN(reterr);
14832 }
14833
14834 /**
14835 Create a .ndb file to serve as a placeholder indicating
14836 that the table with this name is a ndb table.
14837 */
14838
write_ndb_file(const char * name) const14839 int ha_ndbcluster::write_ndb_file(const char *name) const
14840 {
14841 File file;
14842 bool error=1;
14843 char path[FN_REFLEN];
14844
14845 DBUG_ENTER("write_ndb_file");
14846 DBUG_PRINT("enter", ("name: %s", name));
14847
14848 #ifndef EMBEDDED_LIBRARY
14849 (void)strxnmov(path, FN_REFLEN-1,
14850 mysql_data_home,"/",name,ha_ndb_ext,NullS);
14851 #else
14852 (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);
14853 #endif
14854
14855 if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
14856 {
14857 // It's an empty file
14858 error=0;
14859 my_close(file,MYF(0));
14860 }
14861 DBUG_RETURN(error);
14862 }
14863
check_read_before_write_removal()14864 void ha_ndbcluster::check_read_before_write_removal()
14865 {
14866 DBUG_ENTER("check_read_before_write_removal");
14867
14868 /* Must have determined that rbwr is possible */
14869 assert(m_read_before_write_removal_possible);
14870 m_read_before_write_removal_used= true;
14871
14872 /* Can't use on table with hidden primary key */
14873 assert(table_share->primary_key != MAX_KEY);
14874
14875 /* Index must be unique */
14876 DBUG_PRINT("info", ("using index %d", active_index));
14877 const KEY *key= table->key_info + active_index;
14878 assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);
14879
14880 DBUG_VOID_RETURN;
14881 }
14882
14883
14884 /****************************************************************************
14885 * MRR interface implementation
14886 ***************************************************************************/
14887
14888 /**
14889 We will not attempt to deal with more than this many ranges in a single
14890 MRR execute().
14891 */
14892 #define MRR_MAX_RANGES 128
14893
14894 /*
14895 Types of ranges during multi_range_read.
14896
14897 Code assumes that X < enum_ordered_range is a valid check for range converted
14898 to key operation.
14899 */
14900 enum multi_range_types
14901 {
14902 enum_unique_range, /// Range converted to key operation
14903 enum_empty_unique_range, /// No data found (in key operation)
14904 enum_ordered_range, /// Normal ordered index scan range
14905 enum_skip_range /// Empty range (eg. partition pruning)
14906 };
14907
14908 /**
14909 Usage of the MRR buffer is as follows:
14910
14911 First, N char * values, each being the custom value obtained from
14912 RANGE_SEQ_IF::next() that needs to be returned from multi_range_read_next().
14913 N is usually == total number of ranges, but never more than MRR_MAX_RANGES
14914 (the MRR is split across several execute()s if necessary). N may be lower
14915 than actual number of ranges in a single execute() in case of split for
14916 other reasons.
14917
14918 This is followed by N variable-sized entries, each
14919
14920 - 1 byte of multi_range_types for this range.
14921
14922 - (Only) for ranges converted to key operations (enum_unique_range and
14923 enum_empty_unique_range), this is followed by table_share->reclength
14924 bytes of row data.
14925 */
14926
14927 static inline
multi_range_buffer_size(const HANDLER_BUFFER * buffer)14928 ulong multi_range_buffer_size(const HANDLER_BUFFER* buffer)
14929 {
14930 const size_t buf_size = buffer->buffer_end - buffer->buffer;
14931 assert(buf_size < ULONG_MAX);
14932 return (ulong)buf_size;
14933 }
14934
14935 /* Return the needed size of the fixed array at start of HANDLER_BUFFER. */
14936 static ulong
multi_range_fixed_size(int num_ranges)14937 multi_range_fixed_size(int num_ranges)
14938 {
14939 if (num_ranges > MRR_MAX_RANGES)
14940 num_ranges= MRR_MAX_RANGES;
14941 return num_ranges * sizeof(char *);
14942 }
14943
14944 /* Return max number of ranges so that fixed part will still fit in buffer. */
14945 static int
multi_range_max_ranges(int num_ranges,ulong bufsize)14946 multi_range_max_ranges(int num_ranges, ulong bufsize)
14947 {
14948 if (num_ranges > MRR_MAX_RANGES)
14949 num_ranges= MRR_MAX_RANGES;
14950 if (num_ranges * sizeof(char *) > bufsize)
14951 num_ranges= bufsize / sizeof(char *);
14952 return num_ranges;
14953 }
14954
14955 /* Return the size in HANDLER_BUFFER of a variable-sized entry. */
14956 static ulong
multi_range_entry_size(my_bool use_keyop,ulong reclength)14957 multi_range_entry_size(my_bool use_keyop, ulong reclength)
14958 {
14959 /* Space for type byte. */
14960 ulong len= 1;
14961 if (use_keyop)
14962 len+= reclength;
14963 return len;
14964 }
14965
14966 /*
14967 Return the maximum size of a variable-sized entry in HANDLER_BUFFER.
14968
14969 Actual size may depend on key values (whether the actual value can be
14970 converted to a hash key operation or needs to be done as an ordered index
14971 scan).
14972 */
14973 static ulong
multi_range_max_entry(NDB_INDEX_TYPE keytype,ulong reclength)14974 multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength)
14975 {
14976 return multi_range_entry_size(keytype != ORDERED_INDEX, reclength);
14977 }
14978
14979 static uchar &
multi_range_entry_type(uchar * p)14980 multi_range_entry_type(uchar *p)
14981 {
14982 return *p;
14983 }
14984
14985 /* Find the start of the next entry in HANDLER_BUFFER. */
14986 static uchar *
multi_range_next_entry(uchar * p,ulong reclength)14987 multi_range_next_entry(uchar *p, ulong reclength)
14988 {
14989 my_bool use_keyop= multi_range_entry_type(p) < enum_ordered_range;
14990 return p + multi_range_entry_size(use_keyop, reclength);
14991 }
14992
14993 /* Get pointer to row data (for range converted to key operation). */
14994 static uchar *
multi_range_row(uchar * p)14995 multi_range_row(uchar *p)
14996 {
14997 assert(multi_range_entry_type(p) == enum_unique_range);
14998 return p + 1;
14999 }
15000
15001 /* Get and put upper layer custom char *, use memcpy() for unaligned access. */
15002 static char *
multi_range_get_custom(HANDLER_BUFFER * buffer,int range_no)15003 multi_range_get_custom(HANDLER_BUFFER *buffer, int range_no)
15004 {
15005 assert(range_no < MRR_MAX_RANGES);
15006 char* res;
15007 memcpy(&res, buffer->buffer + range_no*sizeof(char*), sizeof(char*));
15008 return res;
15009 }
15010
15011 static void
multi_range_put_custom(HANDLER_BUFFER * buffer,int range_no,char * custom)15012 multi_range_put_custom(HANDLER_BUFFER *buffer, int range_no, char *custom)
15013 {
15014 assert(range_no < MRR_MAX_RANGES);
15015 // memcpy() required for unaligned access.
15016 memcpy(buffer->buffer + range_no*sizeof(char*), &custom, sizeof(char*));
15017 }
15018
15019 /*
15020 This is used to check if an ordered index scan is needed for a range in
15021 a multi range read.
15022 If a scan is not needed, we use a faster primary/unique key operation
15023 instead.
15024 */
15025 static my_bool
read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,const KEY * key_info,const KEY_MULTI_RANGE * r,bool is_pushed)15026 read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,
15027 const KEY_MULTI_RANGE *r, bool is_pushed)
15028 {
15029 if (cur_index_type == ORDERED_INDEX || is_pushed)
15030 return TRUE;
15031 if (cur_index_type == PRIMARY_KEY_INDEX)
15032 return FALSE;
15033 if (cur_index_type == UNIQUE_INDEX) { // a 'UNIQUE ... USING HASH' index
15034 // UNIQUE_INDEX is used iff optimizer set HA_MRR_NO_NULL_ENDPOINTS.
15035 // Assert that there are no NULL values in key as promissed.
15036 assert(!check_null_in_key(key_info, r->start_key.key, r->start_key.length));
15037 return FALSE;
15038 }
15039 assert(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
15040 cur_index_type == UNIQUE_ORDERED_INDEX);
15041 if (r->start_key.length != key_info->key_length ||
15042 r->start_key.flag != HA_READ_KEY_EXACT)
15043 return TRUE; // Not exact match, need scan
15044 if (cur_index_type == UNIQUE_ORDERED_INDEX &&
15045 check_null_in_key(key_info, r->start_key.key,r->start_key.length))
15046 return TRUE; // Can't use for NULL values
15047 return FALSE;
15048 }
15049
15050 /*
15051 Get cost and other information about MRR scan over a known list of ranges
15052
15053 SYNOPSIS
15054 See handler::multi_range_read_info_const.
15055
15056 DESCRIPTION
15057 The implementation is copied from handler::multi_range_read_info_const.
15058 The only difference is that NDB-MRR cannot handle blob columns or keys
15059 with NULLs for unique indexes. We disable MRR for those cases.
15060
15061 NOTES
15062 See NOTES for handler::multi_range_read_info_const().
15063 */
15064
15065 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)15066 ha_ndbcluster::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
15067 void *seq_init_param,
15068 uint n_ranges, uint *bufsz,
15069 uint *flags, Cost_estimate *cost)
15070 {
15071 ha_rows rows;
15072 uint def_flags= *flags;
15073 uint def_bufsz= *bufsz;
15074
15075 DBUG_ENTER("ha_ndbcluster::multi_range_read_info_const");
15076
15077 /* Get cost/flags/mem_usage of default MRR implementation */
15078 rows= handler::multi_range_read_info_const(keyno, seq, seq_init_param,
15079 n_ranges, &def_bufsz,
15080 &def_flags, cost);
15081 if (unlikely(rows == HA_POS_ERROR))
15082 {
15083 DBUG_RETURN(rows);
15084 }
15085
15086 /*
15087 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is
15088 an order to use the default MRR implementation.
15089 Otherwise, make a choice based on requested *flags, handler
15090 capabilities, cost and mrr* flags of @@optimizer_switch.
15091 */
15092 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
15093 choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost))
15094 {
15095 DBUG_PRINT("info", ("Default MRR implementation choosen"));
15096 *flags= def_flags;
15097 *bufsz= def_bufsz;
15098 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
15099 }
15100 else
15101 {
15102 /* *flags and *bufsz were set by choose_mrr_impl */
15103 DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
15104 assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
15105 }
15106 DBUG_RETURN(rows);
15107 }
15108
15109
15110 /*
15111 Get cost and other information about MRR scan over some sequence of ranges
15112
15113 SYNOPSIS
15114 See handler::multi_range_read_info.
15115 */
15116
15117 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)15118 ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
15119 uint *bufsz, uint *flags,
15120 Cost_estimate *cost)
15121 {
15122 ha_rows res;
15123 uint def_flags= *flags;
15124 uint def_bufsz= *bufsz;
15125
15126 DBUG_ENTER("ha_ndbcluster::multi_range_read_info");
15127
15128 /* Get cost/flags/mem_usage of default MRR implementation */
15129 res= handler::multi_range_read_info(keyno, n_ranges, n_rows,
15130 &def_bufsz, &def_flags,
15131 cost);
15132 if (unlikely(res == HA_POS_ERROR))
15133 {
15134 /* Default implementation can't perform MRR scan => we can't either */
15135 DBUG_RETURN(res);
15136 }
15137 assert(!res);
15138
15139 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
15140 choose_mrr_impl(keyno, n_ranges, n_rows, bufsz, flags, cost))
15141 {
15142 /* Default implementation is choosen */
15143 DBUG_PRINT("info", ("Default MRR implementation choosen"));
15144 *flags= def_flags;
15145 *bufsz= def_bufsz;
15146 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
15147 }
15148 else
15149 {
15150 /* *flags and *bufsz were set by choose_mrr_impl */
15151 DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
15152 assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
15153 }
15154 DBUG_RETURN(res);
15155 }
15156
15157 /**
15158 Internals: Choose between Default MRR implementation and
15159 native ha_ndbcluster MRR
15160
15161 Make the choice between using Default MRR implementation and ha_ndbcluster-MRR.
15162 This function contains common functionality factored out of multi_range_read_info()
15163 and multi_range_read_info_const(). The function assumes that the default MRR
15164 implementation's applicability requirements are satisfied.
15165
15166 @param keyno Index number
15167 @param n_ranges Number of ranges/keys (i.e. intervals) in the range sequence.
15168 @param n_rows E(full rows to be retrieved)
15169 @param bufsz OUT If DS-MRR is choosen, buffer use of DS-MRR implementation
15170 else the value is not modified
15171 @param flags IN MRR flags provided by the MRR user
15172 OUT If DS-MRR is choosen, flags of DS-MRR implementation
15173 else the value is not modified
15174 @param cost IN Cost of default MRR implementation
15175 OUT If DS-MRR is choosen, cost of DS-MRR scan
15176 else the value is not modified
15177
15178 @retval TRUE Default MRR implementation should be used
15179 @retval FALSE NDB-MRR implementation should be used
15180 */
15181
choose_mrr_impl(uint keyno,uint n_ranges,ha_rows n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)15182 bool ha_ndbcluster::choose_mrr_impl(uint keyno, uint n_ranges, ha_rows n_rows,
15183 uint *bufsz, uint *flags, Cost_estimate *cost)
15184 {
15185 THD *thd= current_thd;
15186 NDB_INDEX_TYPE key_type= get_index_type(keyno);
15187
15188 get_read_set(true, keyno);
15189
15190 /* Disable MRR on blob read and on NULL lookup in unique index. */
15191 if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
15192 uses_blob_value(table->read_set) ||
15193 ( key_type == UNIQUE_INDEX &&
15194 has_null_in_unique_index(keyno) &&
15195 !(*flags & HA_MRR_NO_NULL_ENDPOINTS)))
15196 {
15197 /* Use the default implementation, don't modify args: See comments */
15198 return true;
15199 }
15200
15201 /**
15202 * Calculate *bufsz, fallback to default MRR if we can't allocate
15203 * suffient buffer space for NDB-MRR
15204 */
15205 {
15206 uint save_bufsize= *bufsz;
15207 ulong reclength= table_share->reclength;
15208 uint entry_size= multi_range_max_entry(key_type, reclength);
15209 uint min_total_size= entry_size + multi_range_fixed_size(1);
15210 DBUG_PRINT("info", ("MRR bufsize suggested=%u want=%u limit=%d",
15211 save_bufsize, (uint)(n_rows + 1) * entry_size,
15212 (*flags & HA_MRR_LIMITS) != 0));
15213 if (save_bufsize < min_total_size)
15214 {
15215 if (*flags & HA_MRR_LIMITS)
15216 {
15217 /* Too small buffer limit for native NDB-MRR. */
15218 return true;
15219 }
15220 *bufsz= min_total_size;
15221 }
15222 else
15223 {
15224 uint max_ranges= (n_ranges > 0) ? n_ranges : MRR_MAX_RANGES;
15225 *bufsz= min(save_bufsize,
15226 (uint)(n_rows * entry_size + multi_range_fixed_size(max_ranges)));
15227 }
15228 DBUG_PRINT("info", ("MRR bufsize set to %u", *bufsz));
15229 }
15230
15231 /**
15232 * Cost based MRR optimization is known to be incorrect.
15233 * Disabled -> always use NDB-MRR whenever possible
15234 */
15235 *flags&= ~HA_MRR_USE_DEFAULT_IMPL;
15236 *flags|= HA_MRR_SUPPORT_SORTED;
15237
15238 return false;
15239 }
15240
15241
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buffer)15242 int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs,
15243 void *seq_init_param,
15244 uint n_ranges, uint mode,
15245 HANDLER_BUFFER *buffer)
15246 {
15247 int error;
15248 DBUG_ENTER("ha_ndbcluster::multi_range_read_init");
15249
15250 /*
15251 If supplied buffer is smaller than needed for just one range, we cannot do
15252 multi_range_read.
15253 */
15254 const ulong bufsize= multi_range_buffer_size(buffer);
15255
15256 if (mode & HA_MRR_USE_DEFAULT_IMPL
15257 || bufsize < multi_range_fixed_size(1) +
15258 multi_range_max_entry(get_index_type(active_index),
15259 table_share->reclength)
15260 || (m_pushed_join_operation==PUSHED_ROOT &&
15261 !m_disable_pushed_join &&
15262 !m_pushed_join_member->get_query_def().isScanQuery())
15263 || m_delete_cannot_batch || m_update_cannot_batch)
15264 {
15265 m_disable_multi_read= TRUE;
15266 DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
15267 n_ranges, mode, buffer));
15268 }
15269
15270 /**
15271 * There may still be an open m_multi_cursor from the previous mrr access on this handler.
15272 * Close it now to free up resources for this NdbScanOperation.
15273 */
15274 if (unlikely((error= close_scan())))
15275 DBUG_RETURN(error);
15276
15277 m_disable_multi_read= FALSE;
15278
15279 mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
15280 /*
15281 Copy arguments into member variables
15282 */
15283 multi_range_buffer= buffer;
15284 mrr_funcs= *seq_funcs;
15285 mrr_iter= mrr_funcs.init(seq_init_param, n_ranges, mode);
15286 ranges_in_seq= n_ranges;
15287 m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range);
15288 mrr_need_range_assoc = !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
15289 if (mrr_need_range_assoc)
15290 {
15291 ha_statistic_increment(&SSV::ha_multi_range_read_init_count);
15292 }
15293
15294 /*
15295 We do not start fetching here with execute(), rather we defer this to the
15296 first call to multi_range_read_next() by setting first_running_range and
15297 first_unstarted_range like this.
15298
15299 The reason is that the MRR interface is designed so that in some cases
15300 multi_range_read_next() may never get called (eg. in case of WHERE
15301 condition on previous table that is never satisfied). So we may not need
15302 to fetch anything.
15303
15304 Also, at the time of writing, returning an error from
15305 multi_range_read_init() does not correctly set the error status, so we get
15306 an assert on missing result status in net_end_statement().
15307 */
15308 first_running_range= 0;
15309 first_unstarted_range= 0;
15310
15311 DBUG_RETURN(0);
15312 }
15313
15314
multi_range_start_retrievals(uint starting_range)15315 int ha_ndbcluster::multi_range_start_retrievals(uint starting_range)
15316 {
15317 KEY* key_info= table->key_info + active_index;
15318 ulong reclength= table_share->reclength;
15319 const NdbOperation* op;
15320 NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
15321 const NdbOperation *oplist[MRR_MAX_RANGES];
15322 uint num_keyops= 0;
15323 NdbTransaction *trans= m_thd_ndb->trans;
15324 int error;
15325 const bool is_pushed=
15326 check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
15327 active_index);
15328
15329 DBUG_ENTER("multi_range_start_retrievals");
15330
15331 /*
15332 * read multi range will read ranges as follows (if not ordered)
15333 *
15334 * input read order
15335 * ====== ==========
15336 * pk-op 1 pk-op 1
15337 * pk-op 2 pk-op 2
15338 * range 3 range (3,5) NOTE result rows will be intermixed
15339 * pk-op 4 pk-op 4
15340 * range 5
15341 * pk-op 6 pk-op 6
15342 */
15343
15344 /*
15345 We loop over all ranges, converting into primary/unique key operations if
15346 possible, and adding ranges to an ordered index scan for the rest.
15347
15348 If the supplied HANDLER_BUFFER is too small, we may also need to do only
15349 part of the multi read at once.
15350 */
15351
15352 assert(cur_index_type != UNDEFINED_INDEX);
15353 assert(m_multi_cursor==NULL);
15354 assert(m_active_query==NULL);
15355
15356 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
15357 const uchar *end_of_buffer= multi_range_buffer->buffer_end;
15358
15359 /*
15360 Normally we should have sufficient buffer for the whole fixed_sized part.
15361 But we need to make sure we do not crash if upper layer gave us a _really_
15362 small buffer.
15363
15364 We already checked (in multi_range_read_init()) that we got enough buffer
15365 for at least one range.
15366 */
15367 uint min_entry_size=
15368 multi_range_entry_size(!read_multi_needs_scan(cur_index_type, key_info,
15369 &mrr_cur_range, is_pushed),
15370 reclength);
15371 const ulong bufsize= multi_range_buffer_size(multi_range_buffer);
15372 int max_range= multi_range_max_ranges(ranges_in_seq,
15373 bufsize - min_entry_size);
15374 assert(max_range > 0);
15375 uchar *row_buf= multi_range_buffer->buffer + multi_range_fixed_size(max_range);
15376 m_multi_range_result_ptr= row_buf;
15377
15378 int range_no= 0;
15379 int mrr_range_no= starting_range;
15380 bool any_real_read= FALSE;
15381
15382 if (m_read_before_write_removal_possible)
15383 check_read_before_write_removal();
15384
15385 for (;
15386 !m_range_res;
15387 range_no++, m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))
15388 {
15389 if (range_no >= max_range)
15390 break;
15391 my_bool need_scan=
15392 read_multi_needs_scan(cur_index_type, key_info, &mrr_cur_range, is_pushed);
15393 if (row_buf + multi_range_entry_size(!need_scan, reclength) > end_of_buffer)
15394 break;
15395 if (need_scan)
15396 {
15397 if (range_no > NdbIndexScanOperation::MaxRangeNo)
15398 break;
15399 /*
15400 Check how much KEYINFO data we already used for index bounds, and
15401 split the MRR here if it exceeds a certain limit. This way we avoid
15402 overloading the TC block in the ndb kernel.
15403
15404 The limit used is based on the value MAX_KEY_SIZE_IN_WORDS.
15405 */
15406 if (m_multi_cursor && m_multi_cursor->getCurrentKeySize() >= 1000)
15407 break;
15408 }
15409
15410 mrr_range_no++;
15411 multi_range_put_custom(multi_range_buffer, range_no, mrr_cur_range.ptr);
15412
15413 part_id_range part_spec;
15414 if (m_use_partition_pruning)
15415 {
15416 get_partition_set(table, table->record[0], active_index,
15417 &mrr_cur_range.start_key,
15418 &part_spec);
15419 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
15420 part_spec.start_part, part_spec.end_part));
15421 /*
15422 If partition pruning has found no partition in set
15423 we can skip this scan
15424 */
15425 if (part_spec.start_part > part_spec.end_part)
15426 {
15427 /*
15428 We can skip this range since the key won't fit into any
15429 partition
15430 */
15431 multi_range_entry_type(row_buf)= enum_skip_range;
15432 row_buf= multi_range_next_entry(row_buf, reclength);
15433 continue;
15434 }
15435 if (!trans &&
15436 (part_spec.start_part == part_spec.end_part))
15437 if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
15438 error))))
15439 DBUG_RETURN(error);
15440 }
15441
15442 if (need_scan)
15443 {
15444 if (!trans)
15445 {
15446 // ToDo see if we can use start_transaction_key here instead
15447 if (!m_use_partition_pruning)
15448 {
15449 get_partition_set(table, table->record[0], active_index,
15450 &mrr_cur_range.start_key,
15451 &part_spec);
15452 if (part_spec.start_part == part_spec.end_part)
15453 {
15454 if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
15455 error))))
15456 DBUG_RETURN(error);
15457 }
15458 else if (unlikely(!(trans= start_transaction(error))))
15459 DBUG_RETURN(error);
15460 }
15461 else if (unlikely(!(trans= start_transaction(error))))
15462 DBUG_RETURN(error);
15463 }
15464
15465 any_real_read= TRUE;
15466 DBUG_PRINT("info", ("any_real_read= TRUE"));
15467
15468 /* Create the scan operation for the first scan range. */
15469 if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
15470 active_index))
15471 {
15472 assert(!m_read_before_write_removal_used);
15473 if (!m_active_query)
15474 {
15475 const int error= create_pushed_join();
15476 if (unlikely(error))
15477 DBUG_RETURN(error);
15478
15479 NdbQuery* const query= m_active_query;
15480 if (mrr_is_output_sorted &&
15481 query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))
15482 ERR_RETURN(query->getNdbError());
15483 }
15484 } // check_if_pushable()
15485 else
15486 if (!m_multi_cursor)
15487 {
15488 /* Do a multi-range index scan for ranges not done by primary/unique key. */
15489 NdbScanOperation::ScanOptions options;
15490 NdbInterpretedCode code(m_table);
15491
15492 options.optionsPresent=
15493 NdbScanOperation::ScanOptions::SO_SCANFLAGS |
15494 NdbScanOperation::ScanOptions::SO_PARALLEL;
15495
15496 options.scan_flags=
15497 NdbScanOperation::SF_ReadRangeNo |
15498 NdbScanOperation::SF_MultiRange;
15499
15500 if (lm == NdbOperation::LM_Read)
15501 options.scan_flags|= NdbScanOperation::SF_KeyInfo;
15502 if (mrr_is_output_sorted)
15503 options.scan_flags|= NdbScanOperation::SF_OrderByFull;
15504
15505 options.parallel= DEFAULT_PARALLELISM;
15506
15507 NdbOperation::GetValueSpec gets[2];
15508 if (table_share->primary_key == MAX_KEY)
15509 get_hidden_fields_scan(&options, gets);
15510
15511 if (m_cond && m_cond->generate_scan_filter(&code, &options))
15512 ERR_RETURN(code.getNdbError());
15513
15514 /* Define scan */
15515 NdbIndexScanOperation *scanOp= trans->scanIndex
15516 (m_index[active_index].ndb_record_key,
15517 m_ndb_record,
15518 lm,
15519 (uchar *)(table->read_set->bitmap),
15520 NULL, /* All bounds specified below */
15521 &options,
15522 sizeof(NdbScanOperation::ScanOptions));
15523
15524 if (!scanOp)
15525 ERR_RETURN(trans->getNdbError());
15526
15527 m_multi_cursor= scanOp;
15528
15529 /* Can't have blobs in multi range read */
15530 assert(!uses_blob_value(table->read_set));
15531
15532 /* We set m_next_row=0 to m that no row was fetched from the scan yet. */
15533 m_next_row= 0;
15534 }
15535
15536 Ndb::PartitionSpec ndbPartitionSpec;
15537 const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;
15538
15539 /* If this table uses user-defined partitioning, use MySQLD provided
15540 * partition info as pruning info
15541 * Otherwise, scan range pruning is performed automatically by
15542 * NDBAPI based on distribution key values.
15543 */
15544 if (m_use_partition_pruning &&
15545 m_user_defined_partitioning &&
15546 (part_spec.start_part == part_spec.end_part))
15547 {
15548 DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",
15549 part_spec.start_part));
15550 ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;
15551 ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;
15552 ndbPartSpecPtr= &ndbPartitionSpec;
15553 }
15554
15555 /* Include this range in the ordered index scan. */
15556 NdbIndexScanOperation::IndexBound bound;
15557 compute_index_bounds(bound, key_info,
15558 &mrr_cur_range.start_key, &mrr_cur_range.end_key, 0);
15559 bound.range_no= range_no;
15560
15561 const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
15562 if (m_active_query)
15563 {
15564 DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));
15565 if (m_active_query->setBound(key_rec, &bound))
15566 {
15567 ERR_RETURN(trans->getNdbError());
15568 }
15569 }
15570 else
15571 {
15572 if (m_multi_cursor->setBound(m_index[active_index].ndb_record_key,
15573 bound,
15574 ndbPartSpecPtr, // Only for user-def tables
15575 sizeof(Ndb::PartitionSpec)))
15576 {
15577 ERR_RETURN(trans->getNdbError());
15578 }
15579 }
15580
15581 multi_range_entry_type(row_buf)= enum_ordered_range;
15582 row_buf= multi_range_next_entry(row_buf, reclength);
15583 }
15584 else
15585 {
15586 multi_range_entry_type(row_buf)= enum_unique_range;
15587
15588 if (!trans)
15589 {
15590 assert(active_index != MAX_KEY);
15591 if (unlikely(!(trans= start_transaction_key(active_index,
15592 mrr_cur_range.start_key.key,
15593 error))))
15594 DBUG_RETURN(error);
15595 }
15596
15597 if (m_read_before_write_removal_used)
15598 {
15599 DBUG_PRINT("info", ("m_read_before_write_removal_used == TRUE"));
15600
15601 /* Key will later be returned as result record.
15602 * Save it in 'row_buf' from where it will later retrieved.
15603 */
15604 key_restore(multi_range_row(row_buf),
15605 (uchar*)mrr_cur_range.start_key.key,
15606 key_info, key_info->key_length);
15607
15608 op= NULL; // read_before_write_removal
15609 }
15610 else
15611 {
15612 any_real_read= TRUE;
15613 DBUG_PRINT("info", ("any_real_read= TRUE"));
15614
15615 /* Convert to primary/unique key operation. */
15616 Uint32 partitionId;
15617 Uint32* ppartitionId = NULL;
15618
15619 if (m_user_defined_partitioning &&
15620 (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
15621 cur_index_type == PRIMARY_KEY_INDEX))
15622 {
15623 partitionId=part_spec.start_part;
15624 ppartitionId=&partitionId;
15625 }
15626
15627 /**
15628 * 'Pushable codepath' is incomplete and expected not
15629 * to be produced as make_join_pushed() handle
15630 * AT_MULTI_UNIQUE_KEY as non-pushable.
15631 */
15632 if (m_pushed_join_operation==PUSHED_ROOT &&
15633 !m_disable_pushed_join &&
15634 !m_pushed_join_member->get_query_def().isScanQuery())
15635 {
15636 op= NULL; // Avoid compiler warning
15637 assert(false); // FIXME: Incomplete code, should not be executed
15638 assert(lm == NdbOperation::LM_CommittedRead);
15639 const int error= pk_unique_index_read_key_pushed(active_index,
15640 mrr_cur_range.start_key.key,
15641 ppartitionId);
15642 if (unlikely(error))
15643 DBUG_RETURN(error);
15644 }
15645 else
15646 {
15647 if (m_pushed_join_operation == PUSHED_ROOT)
15648 {
15649 DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));
15650 m_thd_ndb->m_pushed_queries_dropped++;
15651 }
15652 if (!(op= pk_unique_index_read_key(active_index,
15653 mrr_cur_range.start_key.key,
15654 multi_range_row(row_buf), lm,
15655 ppartitionId)))
15656 ERR_RETURN(trans->getNdbError());
15657 }
15658 }
15659 oplist[num_keyops++]= op;
15660 row_buf= multi_range_next_entry(row_buf, reclength);
15661 }
15662 }
15663
15664 if (m_active_query != NULL &&
15665 m_pushed_join_member->get_query_def().isScanQuery())
15666 {
15667 m_thd_ndb->m_scan_count++;
15668 if (mrr_is_output_sorted)
15669 {
15670 m_thd_ndb->m_sorted_scan_count++;
15671 }
15672
15673 bool prunable= false;
15674 if (unlikely(m_active_query->isPrunable(prunable) != 0))
15675 ERR_RETURN(m_active_query->getNdbError());
15676 if (prunable)
15677 m_thd_ndb->m_pruned_scan_count++;
15678
15679 DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));
15680 assert(!m_multi_cursor);
15681 }
15682 if (m_multi_cursor)
15683 {
15684 DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",
15685 m_multi_cursor->getPruned()));
15686 m_thd_ndb->m_scan_count++;
15687 m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);
15688 if (mrr_is_output_sorted)
15689 {
15690 m_thd_ndb->m_sorted_scan_count++;
15691 }
15692 }
15693
15694 if (any_real_read && execute_no_commit_ie(m_thd_ndb, trans))
15695 ERR_RETURN(trans->getNdbError());
15696
15697 if (!m_range_res)
15698 {
15699 DBUG_PRINT("info",
15700 ("Split MRR read, %d-%d of %d bufsize=%lu used=%lu range_no=%d",
15701 starting_range, mrr_range_no - 1, ranges_in_seq,
15702 (ulong)(end_of_buffer - multi_range_buffer->buffer),
15703 (ulong)(row_buf - multi_range_buffer->buffer), range_no));
15704 /*
15705 Mark that we're using entire buffer (even if might not) as we are not
15706 reading read all ranges yet.
15707
15708 This as we don't want mysqld to reuse the buffer when we read the
15709 remaining ranges.
15710 */
15711 multi_range_buffer->end_of_used_area= multi_range_buffer->buffer_end;
15712 }
15713 else
15714 multi_range_buffer->end_of_used_area= row_buf;
15715
15716 first_running_range= first_range_in_batch= starting_range;
15717 first_unstarted_range= mrr_range_no;
15718 m_current_range_no= 0;
15719
15720 /*
15721 Now we need to inspect all ranges that were converted to key operations.
15722
15723 We need to check for any error (in particular NoDataFound), and remember
15724 the status, since the operation pointer may no longer be valid when we
15725 actually get to it in multi_range_next_entry() (we may have done further
15726 execute()'s in a different handler object during joins eg.)
15727 */
15728 row_buf= m_multi_range_result_ptr;
15729 uint op_idx= 0;
15730 for (uint r= first_range_in_batch; r < first_unstarted_range; r++)
15731 {
15732 uchar &type_loc= multi_range_entry_type(row_buf);
15733 row_buf= multi_range_next_entry(row_buf, reclength);
15734 if (type_loc >= enum_ordered_range)
15735 continue;
15736
15737 assert(op_idx < MRR_MAX_RANGES);
15738 if ((op= oplist[op_idx++]) == NULL)
15739 continue; // read_before_write_removal
15740
15741 const NdbError &error= op->getNdbError();
15742 if (error.code != 0)
15743 {
15744 if (error.classification == NdbError::NoDataFound)
15745 type_loc= enum_empty_unique_range;
15746 else
15747 {
15748 /*
15749 This shouldn't really happen.
15750
15751 There aren't really any other errors that could happen on the read
15752 without also aborting the transaction and causing execute() to
15753 return failure.
15754
15755 (But we can still safely return an error code in non-debug builds).
15756 */
15757 assert(FALSE);
15758 ERR_RETURN(error); /* purecov: deadcode */
15759 }
15760 }
15761 }
15762
15763 DBUG_RETURN(0);
15764 }
15765
multi_range_read_next(char ** range_info)15766 int ha_ndbcluster::multi_range_read_next(char **range_info)
15767 {
15768 int res;
15769 DBUG_ENTER("ha_ndbcluster::multi_range_read_next");
15770
15771 if (m_disable_multi_read)
15772 {
15773 DBUG_RETURN(handler::multi_range_read_next(range_info));
15774 }
15775
15776 for(;;)
15777 {
15778 /* for each range (we should have remembered the number) */
15779 while (first_running_range < first_unstarted_range)
15780 {
15781 uchar *row_buf= m_multi_range_result_ptr;
15782 int expected_range_no= first_running_range - first_range_in_batch;
15783
15784 switch (multi_range_entry_type(row_buf))
15785 {
15786 case enum_skip_range:
15787 case enum_empty_unique_range:
15788 /* Nothing in this range; continue with next. */
15789 break;
15790
15791 case enum_unique_range:
15792 /*
15793 Move to next range; we can have at most one record from a unique
15794 range.
15795 */
15796 first_running_range++;
15797 m_multi_range_result_ptr=
15798 multi_range_next_entry(m_multi_range_result_ptr,
15799 table_share->reclength);
15800
15801 /*
15802 Clear m_active_cursor; it is used as a flag in update_row() /
15803 delete_row() to know whether the current tuple is from a scan
15804 or pk operation.
15805 */
15806 m_active_cursor= NULL;
15807
15808 /* Return the record. */
15809 *range_info= multi_range_get_custom(multi_range_buffer,
15810 expected_range_no);
15811 memcpy(table->record[0], multi_range_row(row_buf),
15812 table_share->reclength);
15813 DBUG_RETURN(0);
15814
15815 case enum_ordered_range:
15816 /* An index scan range. */
15817 {
15818 int res;
15819 if ((res= read_multi_range_fetch_next()) != 0)
15820 {
15821 *range_info= multi_range_get_custom(multi_range_buffer,
15822 expected_range_no);
15823 first_running_range++;
15824 m_multi_range_result_ptr=
15825 multi_range_next_entry(m_multi_range_result_ptr,
15826 table_share->reclength);
15827 DBUG_RETURN(res);
15828 }
15829 }
15830 if (!m_next_row)
15831 {
15832 /*
15833 The whole scan is done, and the cursor has been closed.
15834 So nothing more for this range. Move to next.
15835 */
15836 break;
15837 }
15838 else
15839 {
15840 int current_range_no= m_current_range_no;
15841 /*
15842 For a sorted index scan, we will receive rows in increasing
15843 range_no order, so we can return ranges in order, pausing when
15844 range_no indicate that the currently processed range
15845 (first_running_range) is done.
15846
15847 But for unsorted scan, we may receive a high range_no from one
15848 fragment followed by a low range_no from another fragment. So we
15849 need to process all index scan ranges together.
15850 */
15851 if (!mrr_is_output_sorted || expected_range_no == current_range_no)
15852 {
15853 *range_info= multi_range_get_custom(multi_range_buffer,
15854 current_range_no);
15855 /* Copy out data from the new row. */
15856 unpack_record(table->record[0], m_next_row);
15857 table->status= 0;
15858 /*
15859 Mark that we have used this row, so we need to fetch a new
15860 one on the next call.
15861 */
15862 m_next_row= 0;
15863 /*
15864 Set m_active_cursor; it is used as a flag in update_row() /
15865 delete_row() to know whether the current tuple is from a scan or
15866 pk operation.
15867 */
15868 m_active_cursor= m_multi_cursor;
15869
15870 DBUG_RETURN(0);
15871 }
15872 else if (current_range_no > expected_range_no)
15873 {
15874 /* Nothing more in scan for this range. Move to next. */
15875 break;
15876 }
15877 else
15878 {
15879 /*
15880 Should not happen. Ranges should be returned from NDB API in
15881 the order we requested them.
15882 */
15883 assert(0);
15884 break; // Attempt to carry on
15885 }
15886 }
15887
15888 default:
15889 assert(0);
15890 }
15891 /* At this point the current range is done, proceed to next. */
15892 first_running_range++;
15893 m_multi_range_result_ptr=
15894 multi_range_next_entry(m_multi_range_result_ptr, table_share->reclength);
15895 }
15896
15897 if (m_range_res) // mrr_funcs.next() has consumed all ranges.
15898 DBUG_RETURN(HA_ERR_END_OF_FILE);
15899
15900 /*
15901 Read remaining ranges
15902 */
15903 if ((res= multi_range_start_retrievals(first_running_range)))
15904 DBUG_RETURN(res);
15905
15906 } // for(;;)
15907 }
15908
15909
15910 /*
15911 Fetch next row from the ordered index cursor in multi range scan.
15912
15913 We keep the next row in m_next_row, and the range_no of the
15914 next row in m_current_range_no. This is used in sorted index scan
15915 to correctly interleave rows from primary/unique key operations with
15916 rows from the scan.
15917 */
15918 int
read_multi_range_fetch_next()15919 ha_ndbcluster::read_multi_range_fetch_next()
15920 {
15921 DBUG_ENTER("read_multi_range_fetch_next");
15922
15923 if (m_active_query)
15924 {
15925 DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));
15926 if (!m_next_row)
15927 {
15928 int res= fetch_next_pushed();
15929 if (res == NdbQuery::NextResult_gotRow)
15930 {
15931 m_current_range_no= 0;
15932 // m_current_range_no= cursor->get_range_no(); // FIXME SPJ, need rangeNo from index scan
15933 }
15934 else if (res == NdbQuery::NextResult_scanComplete)
15935 {
15936 /* We have fetched the last row from the scan. */
15937 m_active_query->close(FALSE);
15938 m_active_query= NULL;
15939 m_next_row= 0;
15940 DBUG_RETURN(0);
15941 }
15942 else
15943 {
15944 /* An error. */
15945 DBUG_RETURN(res);
15946 }
15947 }
15948 }
15949 else if (m_multi_cursor)
15950 {
15951 if (!m_next_row)
15952 {
15953 NdbIndexScanOperation *cursor= m_multi_cursor;
15954 int res= fetch_next(cursor);
15955 if (res == 0)
15956 {
15957 m_current_range_no= cursor->get_range_no();
15958 }
15959 else if (res == 1)
15960 {
15961 /* We have fetched the last row from the scan. */
15962 cursor->close(FALSE, TRUE);
15963 m_active_cursor= 0;
15964 m_multi_cursor= 0;
15965 m_next_row= 0;
15966 DBUG_RETURN(0);
15967 }
15968 else
15969 {
15970 /* An error. */
15971 DBUG_RETURN(res);
15972 }
15973 }
15974 }
15975 DBUG_RETURN(0);
15976 }
15977
15978
15979 /**
15980 * Try to find pushable subsets of a join plan.
15981 * @param hton unused (maybe useful for other engines).
15982 * @param thd Thread.
15983 * @param plan The join plan to examine.
15984 * @return Possible error code.
15985 */
15986
15987 static
ndbcluster_make_pushed_join(handlerton * hton,THD * thd,const AQP::Join_plan * plan)15988 int ndbcluster_make_pushed_join(handlerton *hton,
15989 THD* thd,
15990 const AQP::Join_plan* plan)
15991 {
15992 DBUG_ENTER("ndbcluster_make_pushed_join");
15993 (void)ha_ndb_ext; // prevents compiler warning.
15994
15995 if (THDVAR(thd, join_pushdown) &&
15996 // Check for online upgrade/downgrade.
15997 ndb_join_pushdown(g_ndb_cluster_connection->get_min_db_version()))
15998 {
15999 bool pushed_something = false;
16000 ndb_pushed_builder_ctx pushed_builder(*plan);
16001
16002 for (uint i= 0; i < plan->get_access_count()-1; i++)
16003 {
16004 const AQP::Table_access* const join_root= plan->get_table_access(i);
16005 const ndb_pushed_join* pushed_join= NULL;
16006
16007 // Try to build a ndb_pushed_join starting from 'join_root'
16008 int error= pushed_builder.make_pushed_join(join_root, pushed_join);
16009 if (unlikely(error))
16010 {
16011 if (error < 0) // getNdbError() gives us the error code
16012 {
16013 ERR_SET(pushed_builder.getNdbError(),error);
16014 }
16015 join_root->get_table()->file->print_error(error, MYF(0));
16016 DBUG_RETURN(error);
16017 }
16018
16019 // Assign any produced pushed_join definitions to
16020 // the ha_ndbcluster instance representing its root.
16021 if (pushed_join != NULL)
16022 {
16023 ha_ndbcluster* const handler=
16024 static_cast<ha_ndbcluster*>(join_root->get_table()->file);
16025
16026 error= handler->assign_pushed_join(pushed_join);
16027 if (unlikely(error))
16028 {
16029 delete pushed_join;
16030 handler->print_error(error, MYF(0));
16031 DBUG_RETURN(error);
16032 }
16033 // Something was pushed and the QEP need to be modified
16034 pushed_something = true;
16035 }
16036 }
16037
16038 if (pushed_something)
16039 {
16040 // Modify the QEP_TAB's to use the 'linked' read functions
16041 // for those parts of the join which have been pushed down.
16042 for (uint i= 0; i < plan->get_access_count(); i++)
16043 {
16044 plan->get_table_access(i)->set_pushed_table_access_method();
16045 }
16046 }
16047 }
16048 DBUG_RETURN(0);
16049 }
16050
16051
16052 /**
16053 * In case a pushed join having the table for this handler as its root
16054 * has been produced. ::assign_pushed_join() is responsible for setting
16055 * up this ha_ndbcluster instance such that the prepared NdbQuery
16056 * might be instantiated at execution time.
16057 */
16058 int
assign_pushed_join(const ndb_pushed_join * pushed_join)16059 ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)
16060 {
16061 DBUG_ENTER("assign_pushed_join");
16062 m_thd_ndb->m_pushed_queries_defined++;
16063
16064 for (uint i = 0; i < pushed_join->get_operation_count(); i++)
16065 {
16066 const TABLE* const tab= pushed_join->get_table(i);
16067 assert(tab->file->ht == ht);
16068 ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);
16069 child->m_pushed_join_member= pushed_join;
16070 child->m_pushed_join_operation= i;
16071 }
16072
16073 DBUG_PRINT("info", ("Assigned pushed join with %d child operations",
16074 pushed_join->get_operation_count()-1));
16075
16076 DBUG_RETURN(0);
16077 }
16078
16079
16080 /**
16081 * First level of filtering tables which *maybe* may be part of
16082 * a pushed query: Returning 'false' will eliminate this table
16083 * from being a part of a pushed join.
16084 * A 'reason' for rejecting this table is required if 'false'
16085 * is returned.
16086 */
16087 bool
maybe_pushable_join(const char * & reason) const16088 ha_ndbcluster::maybe_pushable_join(const char*& reason) const
16089 {
16090 reason= NULL;
16091 if (uses_blob_value(table->read_set))
16092 {
16093 reason= "select list can't contain BLOB columns";
16094 return false;
16095 }
16096 if (m_user_defined_partitioning)
16097 {
16098 reason= "has user defined partioning";
16099 return false;
16100 }
16101
16102 // Pushed operations may not set locks.
16103 const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);
16104 switch (lockMode)
16105 {
16106 case NdbOperation::LM_CommittedRead:
16107 return true;
16108
16109 case NdbOperation::LM_Read:
16110 case NdbOperation::LM_Exclusive:
16111 reason= "lock modes other than 'read committed' not implemented";
16112 return false;
16113
16114 default: // Other lock modes not used by handler.
16115 assert(false);
16116 return false;
16117 }
16118
16119 return true;
16120 }
16121
16122 /**
16123 * Check if this table access operation (and a number of succeding operation)
16124 * can be pushed to the cluster and executed there. This requires that there
16125 * is an NdbQueryDefiniton and that it still matches the corresponds to the
16126 * type of operation that we intend to execute. (The MySQL server will
16127 * sometimes change its mind and replace a scan with a lookup or vice versa
16128 * as it works its way into the nested loop join.)
16129 *
16130 * @param type This is the operation type that the server want to execute.
16131 * @param idx Index used whenever relevant for operation type
16132 * @param needSorted True if the root operation is an ordered index scan
16133 * with sorted results.
16134 * @return True if the operation may be pushed.
16135 */
16136 bool
check_if_pushable(int type,uint idx) const16137 ha_ndbcluster::check_if_pushable(int type, //NdbQueryOperationDef::Type,
16138 uint idx) const
16139 {
16140 if (m_disable_pushed_join)
16141 {
16142 DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
16143 return false;
16144 }
16145 return m_pushed_join_operation == PUSHED_ROOT
16146 && m_pushed_join_member != NULL
16147 && m_pushed_join_member->match_definition(
16148 type,
16149 (idx<MAX_KEY) ? &m_index[idx] : NULL);
16150 }
16151
16152
16153 int
create_pushed_join(const NdbQueryParamValue * keyFieldParams,uint paramCnt)16154 ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)
16155 {
16156 DBUG_ENTER("create_pushed_join");
16157 assert(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);
16158
16159 NdbQuery* const query=
16160 m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);
16161
16162 if (unlikely(query==NULL))
16163 ERR_RETURN(m_thd_ndb->trans->getNdbError());
16164
16165 // Bind to instantiated NdbQueryOperations.
16166 for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)
16167 {
16168 const TABLE* const tab= m_pushed_join_member->get_table(i);
16169 ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);
16170
16171 assert(handler->m_pushed_join_operation==(int)i);
16172 NdbQueryOperation* const op= query->getQueryOperation(i);
16173 handler->m_pushed_operation= op;
16174
16175 // Bind to result buffers
16176 const NdbRecord* const resultRec= handler->m_ndb_record;
16177 int res= op->setResultRowRef(
16178 resultRec,
16179 handler->_m_next_row,
16180 (uchar *)(tab->read_set->bitmap));
16181 if (unlikely(res))
16182 ERR_RETURN(query->getNdbError());
16183
16184 // We clear 'm_next_row' to say that no row was fetched from the query yet.
16185 handler->_m_next_row= 0;
16186 }
16187
16188 assert(m_active_query==NULL);
16189 m_active_query= query;
16190 m_thd_ndb->m_pushed_queries_executed++;
16191
16192 DBUG_RETURN(0);
16193 }
16194
16195
16196 /**
16197 * Check if this table access operation is part of a pushed join operation
16198 * which is actively executing.
16199 */
16200 bool
check_is_pushed() const16201 ha_ndbcluster::check_is_pushed() const
16202 {
16203 if (m_pushed_join_member == NULL)
16204 return false;
16205
16206 handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;
16207 return (static_cast<ha_ndbcluster*>(root)->m_active_query);
16208 }
16209
16210 uint
number_of_pushed_joins() const16211 ha_ndbcluster::number_of_pushed_joins() const
16212 {
16213 if (m_pushed_join_member == NULL)
16214 return 0;
16215 else
16216 return m_pushed_join_member->get_operation_count();
16217 }
16218
16219 const TABLE*
root_of_pushed_join() const16220 ha_ndbcluster::root_of_pushed_join() const
16221 {
16222 if (m_pushed_join_member == NULL)
16223 return NULL;
16224 else
16225 return m_pushed_join_member->get_table(PUSHED_ROOT);
16226 }
16227
16228 const TABLE*
parent_of_pushed_join() const16229 ha_ndbcluster::parent_of_pushed_join() const
16230 {
16231 if (m_pushed_join_operation > PUSHED_ROOT)
16232 {
16233 assert(m_pushed_join_member!=NULL);
16234 uint parent_ix= m_pushed_join_member
16235 ->get_query_def().getQueryOperation(m_pushed_join_operation)
16236 ->getParentOperation(0)
16237 ->getOpNo();
16238 return m_pushed_join_member->get_table(parent_ix);
16239 }
16240 return NULL;
16241 }
16242
16243 /**
16244 Utility thread main loop.
16245 */
Ndb_util_thread()16246 Ndb_util_thread::Ndb_util_thread()
16247 : Ndb_component("Util")
16248 {
16249 native_mutex_init(&LOCK, MY_MUTEX_INIT_FAST);
16250 native_cond_init(&COND);
16251 }
16252
~Ndb_util_thread()16253 Ndb_util_thread::~Ndb_util_thread()
16254 {
16255 native_mutex_destroy(&LOCK);
16256 native_cond_destroy(&COND);
16257 }
16258
do_wakeup()16259 void Ndb_util_thread::do_wakeup()
16260 {
16261 // Wakeup from potential wait
16262 log_info("Wakeup");
16263
16264 native_mutex_lock(&LOCK);
16265 native_cond_signal(&COND);
16266 native_mutex_unlock(&LOCK);
16267 }
16268
16269
ndb_util_thread_stop(void)16270 void ndb_util_thread_stop(void)
16271 {
16272 ndb_util_thread.stop();
16273 }
16274
16275 #include "ndb_log.h"
16276
16277 void
do_run()16278 Ndb_util_thread::do_run()
16279 {
16280 THD *thd; /* needs to be first for thread_stack */
16281 struct timespec abstime;
16282 Thd_ndb *thd_ndb= NULL;
16283 uint share_list_size= 0;
16284 NDB_SHARE **share_list= NULL;
16285
16286 DBUG_ENTER("ndb_util_thread");
16287 DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));
16288
16289 log_info("Starting...");
16290
16291 native_mutex_lock(&LOCK);
16292
16293 thd= new THD; /* note that contructor of THD uses DBUG_ */
16294 if (thd == NULL)
16295 {
16296 set_my_errno(HA_ERR_OUT_OF_MEM);
16297 DBUG_VOID_RETURN;
16298 }
16299 THD_CHECK_SENTRY(thd);
16300
16301 thd->thread_stack= (char*)&thd; /* remember where our stack is */
16302 if (thd->store_globals())
16303 goto ndb_util_thread_fail;
16304 thd_set_command(thd, COM_DAEMON);
16305 #ifndef NDB_THD_HAS_NO_VERSION
16306 thd->version=refresh_version;
16307 #endif
16308 thd->get_protocol_classic()->set_client_capabilities(0);
16309 thd->security_context()->skip_grants();
16310 thd->get_protocol_classic()->init_net((st_vio *) 0);
16311
16312 CHARSET_INFO *charset_connection;
16313 charset_connection= get_charset_by_csname("utf8",
16314 MY_CS_PRIMARY, MYF(MY_WME));
16315 thd->variables.character_set_client= charset_connection;
16316 thd->variables.character_set_results= charset_connection;
16317 thd->variables.collation_connection= charset_connection;
16318 thd->update_charset();
16319
16320 native_mutex_unlock(&LOCK);
16321
16322 log_info("Wait for server start completed");
16323 /*
16324 wait for mysql server to start
16325 */
16326 mysql_mutex_lock(&LOCK_server_started);
16327 while (!mysqld_server_started)
16328 {
16329 set_timespec(&abstime, 1);
16330 mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,
16331 &abstime);
16332 if (is_stop_requested())
16333 {
16334 mysql_mutex_unlock(&LOCK_server_started);
16335 native_mutex_lock(&LOCK);
16336 goto ndb_util_thread_end;
16337 }
16338 }
16339 mysql_mutex_unlock(&LOCK_server_started);
16340
16341 // Defer call of THD::init_for_query until after mysqld_server_started
16342 // to ensure that the parts of MySQL Server it uses has been created
16343 thd->init_for_queries();
16344
16345 log_info("Wait for cluster to start");
16346 /*
16347 Wait for cluster to start
16348 */
16349 native_mutex_lock(&LOCK);
16350 while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
16351 {
16352 /* ndb not connected yet */
16353 native_cond_wait(&COND, &LOCK);
16354 if (is_stop_requested())
16355 goto ndb_util_thread_end;
16356 }
16357 native_mutex_unlock(&LOCK);
16358
16359 /* Get thd_ndb for this thread */
16360 if (!(thd_ndb= Thd_ndb::seize(thd)))
16361 {
16362 sql_print_error("Could not allocate Thd_ndb object");
16363 native_mutex_lock(&LOCK);
16364 goto ndb_util_thread_end;
16365 }
16366 thd_set_thd_ndb(thd, thd_ndb);
16367 thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
16368
16369 if (opt_ndb_extra_logging && ndb_binlog_running)
16370 sql_print_information("NDB Binlog: Ndb tables initially read only.");
16371
16372 log_info("Started");
16373
16374 set_timespec(&abstime, 0);
16375 for (;;)
16376 {
16377 native_mutex_lock(&LOCK);
16378 if (!is_stop_requested())
16379 native_cond_timedwait(&COND,
16380 &LOCK,
16381 &abstime);
16382 if (is_stop_requested()) /* Stopping thread */
16383 goto ndb_util_thread_end;
16384 native_mutex_unlock(&LOCK);
16385 #ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
16386 DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",
16387 opt_ndb_cache_check_time));
16388 #endif
16389
16390 /*
16391 Check if the Ndb object in thd_ndb is still valid(it will be
16392 invalid if connection to cluster has been lost) and recycle
16393 it if necessary.
16394 */
16395 if (!check_ndb_in_thd(thd, false))
16396 {
16397 set_timespec(&abstime, 1);
16398 continue;
16399 }
16400
16401 /*
16402 Regularly give the ndb_binlog component chance to set it self up
16403 i.e at first start it needs to create the ndb_* system tables
16404 and setup event operations on those. In case of lost connection
16405 to cluster, the ndb_* system tables are hopefully still there
16406 but the event operations need to be recreated.
16407 */
16408 if (!ndb_binlog_setup(thd))
16409 {
16410 /* Failed to setup binlog, try again in 1 second */
16411 set_timespec(&abstime, 1);
16412 continue;
16413 }
16414
16415 if (opt_ndb_cache_check_time == 0)
16416 {
16417 /* Wake up in 1 second to check if value has changed */
16418 set_timespec(&abstime, 1);
16419 continue;
16420 }
16421
16422 /* Lock mutex and fill list with pointers to all open tables */
16423 NDB_SHARE *share;
16424 native_mutex_lock(&ndbcluster_mutex);
16425 uint i, open_count, record_count= ndbcluster_open_tables.records;
16426 if (share_list_size < record_count)
16427 {
16428 NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
16429 if (!new_share_list)
16430 {
16431 sql_print_warning("ndb util thread: malloc failure, "
16432 "query cache not maintained properly");
16433 native_mutex_unlock(&ndbcluster_mutex);
16434 goto next; // At least do not crash
16435 }
16436 delete [] share_list;
16437 share_list_size= record_count;
16438 share_list= new_share_list;
16439 }
16440 for (i= 0, open_count= 0; i < record_count; i++)
16441 {
16442 share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);
16443 if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
16444 <= 0)
16445 continue; // injector thread is the only user, skip statistics
16446 /* ndb_share reference temporary, free below */
16447 share->use_count++; /* Make sure the table can't be closed */
16448 share->util_thread= true;
16449 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
16450 share->key_string(), share->use_count));
16451 DBUG_PRINT("ndb_util_thread",
16452 ("Found open table[%d]: %s, use_count: %d",
16453 i, share->table_name, share->use_count));
16454
16455 /* Store pointer to table */
16456 share_list[open_count++]= share;
16457 }
16458 native_mutex_unlock(&ndbcluster_mutex);
16459
16460 /* Iterate through the open files list */
16461 for (i= 0; i < open_count; i++)
16462 {
16463 share= share_list[i];
16464 if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
16465 <= 1)
16466 {
16467 /*
16468 Util thread and injector thread is the only user, skip statistics
16469 */
16470 /* ndb_share reference temporary free */
16471 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
16472 share->key_string(), share->use_count));
16473
16474 native_mutex_lock(&ndbcluster_mutex);
16475 share->util_thread= false;
16476 free_share(&share, true);
16477 native_mutex_unlock(&ndbcluster_mutex);
16478 continue;
16479 }
16480 DBUG_PRINT("ndb_util_thread",
16481 ("Fetching commit count for: %s", share->key_string()));
16482
16483 struct Ndb_statistics stat;
16484 uint lock;
16485 native_mutex_lock(&share->mutex);
16486 lock= share->commit_count_lock;
16487 native_mutex_unlock(&share->mutex);
16488 {
16489 /* Contact NDB to get commit count for table */
16490 Ndb* ndb= thd_ndb->ndb;
16491 if (ndb->setDatabaseName(share->db))
16492 {
16493 goto loop_next;
16494 }
16495 Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
16496 if (ndbtab_g.get_table() &&
16497 ndb_get_table_statistics(thd, NULL, FALSE, ndb,
16498 ndbtab_g.get_table()->getDefaultRecord(),
16499 &stat) == 0)
16500 {
16501 DBUG_PRINT("info", ("Table: %s, commit_count: %llu, rows: %llu",
16502 share->key_string(),
16503 stat.commit_count, stat.row_count));
16504 }
16505 else
16506 {
16507 DBUG_PRINT("ndb_util_thread",
16508 ("Error: Could not get commit count for table %s",
16509 share->key_string()));
16510 stat.commit_count= 0;
16511 }
16512 }
16513 loop_next:
16514 native_mutex_lock(&share->mutex);
16515 if (share->commit_count_lock == lock)
16516 share->commit_count= stat.commit_count;
16517 native_mutex_unlock(&share->mutex);
16518
16519 /* ndb_share reference temporary free */
16520 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
16521 share->key_string(), share->use_count));
16522 native_mutex_lock(&ndbcluster_mutex);
16523 share->util_thread= false;
16524 free_share(&share, true);
16525 native_mutex_unlock(&ndbcluster_mutex);
16526 }
16527 next:
16528 /* Calculate new time to wake up */
16529 set_timespec_nsec(&abstime, opt_ndb_cache_check_time * 1000000ULL);
16530 }
16531
16532 log_info("Stopping...");
16533
16534 native_mutex_lock(&LOCK);
16535
16536 ndb_util_thread_end:
16537 thd->get_protocol_classic()->end_net();
16538 ndb_util_thread_fail:
16539 if (share_list)
16540 delete [] share_list;
16541 if (thd_ndb)
16542 {
16543 Thd_ndb::release(thd_ndb);
16544 thd_set_thd_ndb(thd, NULL);
16545 }
16546 delete thd;
16547
16548 native_mutex_unlock(&LOCK);
16549 DBUG_PRINT("exit", ("ndb_util_thread"));
16550
16551 log_info("Stopped");
16552
16553 DBUG_VOID_RETURN;
16554 }
16555
16556 /*
16557 Condition pushdown
16558 */
16559 /**
16560 Push a condition to ndbcluster storage engine for evaluation
16561 during table and index scans. The conditions will be stored on a stack
16562 for possibly storing several conditions. The stack can be popped
16563 by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
16564 will clear the stack.
16565 The current implementation supports arbitrary AND/OR nested conditions
16566 with comparisons between columns and constants (including constant
16567 expressions and function calls) and the following comparison operators:
16568 =, !=, >, >=, <, <=, "is null", and "is not null".
16569
16570 @retval
16571 NULL The condition was supported and will be evaluated for each
16572 row found during the scan
16573 @retval
16574 cond The condition was not supported and all rows will be returned from
16575 the scan for evaluation (and thus not saved on stack)
16576 */
16577 const
16578 Item*
cond_push(const Item * cond)16579 ha_ndbcluster::cond_push(const Item *cond)
16580 {
16581 DBUG_ENTER("ha_ndbcluster::cond_push");
16582
16583 #if 1
16584 if (cond->used_tables() & ~table->pos_in_table_list->map())
16585 {
16586 /**
16587 * 'cond' refers fields from other tables, or other instances
16588 * of this table, -> reject it.
16589 * (Optimizer need to have a better understanding of what is
16590 * pushable by each handler.)
16591 */
16592 DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););
16593 DBUG_RETURN(cond);
16594 }
16595 #else
16596 /*
16597 Make sure that 'cond' does not refer field(s) from other tables
16598 or other instances of this table.
16599 (This was a legacy bug in optimizer)
16600 */
16601 assert(!(cond->used_tables() & ~table->pos_in_table_list->map()));
16602 #endif
16603 if (!m_cond)
16604 m_cond= new ha_ndbcluster_cond;
16605 if (!m_cond)
16606 {
16607 set_my_errno(HA_ERR_OUT_OF_MEM);
16608 DBUG_RETURN(cond);
16609 }
16610 DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););
16611 DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
16612 }
16613
16614 /**
16615 Pop the top condition from the condition stack of the handler instance.
16616 */
16617 void
cond_pop()16618 ha_ndbcluster::cond_pop()
16619 {
16620 if (m_cond)
16621 m_cond->cond_pop();
16622 }
16623
16624
16625 /*
16626 Implements the SHOW ENGINE NDB STATUS command.
16627 */
16628 bool
ndbcluster_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16629 ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
16630 enum ha_stat_type stat_type)
16631 {
16632 char name[16];
16633 char buf[IO_SIZE];
16634 uint buflen;
16635 DBUG_ENTER("ndbcluster_show_status");
16636
16637 if (stat_type != HA_ENGINE_STATUS)
16638 {
16639 DBUG_RETURN(FALSE);
16640 }
16641
16642 Ndb* ndb= check_ndb_in_thd(thd);
16643 Thd_ndb *thd_ndb= get_thd_ndb(thd);
16644 struct st_ndb_status ns;
16645 if (ndb)
16646 update_status_variables(thd_ndb, &ns, thd_ndb->connection);
16647 else
16648 update_status_variables(NULL, &ns, g_ndb_cluster_connection);
16649
16650 buflen= (uint)
16651 my_snprintf(buf, sizeof(buf),
16652 "cluster_node_id=%ld, "
16653 "connected_host=%s, "
16654 "connected_port=%ld, "
16655 "number_of_data_nodes=%ld, "
16656 "number_of_ready_data_nodes=%ld, "
16657 "connect_count=%ld",
16658 ns.cluster_node_id,
16659 ns.connected_host,
16660 ns.connected_port,
16661 ns.number_of_data_nodes,
16662 ns.number_of_ready_data_nodes,
16663 ns.connect_count);
16664 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16665 STRING_WITH_LEN("connection"), buf, buflen))
16666 DBUG_RETURN(TRUE);
16667
16668 for (int i= 0; i < MAX_NDB_NODES; i++)
16669 {
16670 if (ns.transaction_hint_count[i] > 0 ||
16671 ns.transaction_no_hint_count[i] > 0)
16672 {
16673 uint namelen= (uint)my_snprintf(name, sizeof(name), "node[%d]", i);
16674 buflen= (uint)my_snprintf(buf, sizeof(buf),
16675 "transaction_hint=%ld, transaction_no_hint=%ld",
16676 ns.transaction_hint_count[i],
16677 ns.transaction_no_hint_count[i]);
16678 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16679 name, namelen, buf, buflen))
16680 DBUG_RETURN(TRUE);
16681 }
16682 }
16683
16684 if (ndb)
16685 {
16686 Ndb::Free_list_usage tmp;
16687 tmp.m_name= 0;
16688 while (ndb->get_free_list_usage(&tmp))
16689 {
16690 buflen= (uint)
16691 my_snprintf(buf, sizeof(buf),
16692 "created=%u, free=%u, sizeof=%u",
16693 tmp.m_created, tmp.m_free, tmp.m_sizeof);
16694 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16695 tmp.m_name, (uint)strlen(tmp.m_name), buf, buflen))
16696 DBUG_RETURN(TRUE);
16697 }
16698 }
16699
16700 buflen = (uint)ndbcluster_show_status_binlog(buf, sizeof(buf));
16701 if (buflen)
16702 {
16703 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16704 STRING_WITH_LEN("binlog"), buf, buflen))
16705 DBUG_RETURN(TRUE);
16706 }
16707
16708 DBUG_RETURN(FALSE);
16709 }
16710
16711
get_default_num_partitions(HA_CREATE_INFO * create_info)16712 int ha_ndbcluster::get_default_num_partitions(HA_CREATE_INFO *create_info)
16713 {
16714 if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))
16715 {
16716 err:
16717 my_error(HA_ERR_NO_CONNECTION, MYF(0));
16718 return -1;
16719 }
16720
16721 THD* thd = current_thd;
16722 if (thd == 0)
16723 goto err;
16724 Thd_ndb * thd_ndb = get_thd_ndb(thd);
16725 if (thd_ndb == 0)
16726 goto err;
16727
16728 ha_rows max_rows, min_rows;
16729 if (create_info)
16730 {
16731 max_rows= create_info->max_rows;
16732 min_rows= create_info->min_rows;
16733 }
16734 else
16735 {
16736 max_rows= table_share->max_rows;
16737 min_rows= table_share->min_rows;
16738 }
16739 uint no_fragments= get_no_fragments(max_rows >= min_rows ?
16740 max_rows : min_rows);
16741 uint reported_frags;
16742 adjusted_frag_count(thd_ndb->ndb,
16743 no_fragments,
16744 reported_frags);
16745 return reported_frags;
16746 }
16747
calculate_key_hash_value(Field ** field_array)16748 uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)
16749 {
16750 Uint32 hash_value;
16751 struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];
16752 struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];
16753 Uint32 i= 0;
16754 int ret_val;
16755 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
16756 void *buf= (void*)&tmp[0];
16757 DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");
16758
16759 do
16760 {
16761 Field *field= *field_array;
16762 uint len= field->data_length();
16763 assert(!field->is_real_null());
16764 if (field->real_type() == MYSQL_TYPE_VARCHAR)
16765 len+= ((Field_varstring*)field)->length_bytes;
16766 key_data[i].ptr= field->ptr;
16767 key_data[i++].len= len;
16768 } while (*(++field_array));
16769 key_data[i].ptr= 0;
16770 if ((ret_val= Ndb::computeHash(&hash_value, m_table,
16771 key_data_ptr, buf, sizeof(tmp))))
16772 {
16773 DBUG_PRINT("info", ("ret_val = %d", ret_val));
16774 assert(FALSE);
16775 abort();
16776 }
16777 DBUG_RETURN(hash_value);
16778 }
16779
16780
16781 /*
16782 Set-up auto-partitioning for NDB Cluster
16783
16784 SYNOPSIS
16785 set_auto_partitions()
16786 part_info Partition info struct to set-up
16787
16788 RETURN VALUE
16789 NONE
16790
16791 DESCRIPTION
16792 Set-up auto partitioning scheme for tables that didn't define any
16793 partitioning. We'll use PARTITION BY KEY() in this case which
16794 translates into partition by primary key if a primary key exists
16795 and partition by hidden key otherwise.
16796 */
16797
16798 enum ndb_distribution_enum {
16799 NDB_DISTRIBUTION_KEYHASH= 0,
16800 NDB_DISTRIBUTION_LINHASH= 1
16801 };
16802 static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };
16803 static ulong opt_ndb_distribution;
16804 static TYPELIB distribution_typelib= {
16805 array_elements(distribution_names) - 1,
16806 "",
16807 distribution_names,
16808 NULL
16809 };
16810 static MYSQL_SYSVAR_ENUM(
16811 distribution, /* name */
16812 opt_ndb_distribution, /* var */
16813 PLUGIN_VAR_RQCMDARG,
16814 "Default distribution for new tables in ndb",
16815 NULL, /* check func. */
16816 NULL, /* update func. */
16817 NDB_DISTRIBUTION_KEYHASH, /* default */
16818 &distribution_typelib /* typelib */
16819 );
16820
16821
set_auto_partitions(partition_info * part_info)16822 void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
16823 {
16824 DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
16825 part_info->list_of_part_fields= TRUE;
16826 part_info->part_type= HASH_PARTITION;
16827 switch (opt_ndb_distribution)
16828 {
16829 case NDB_DISTRIBUTION_KEYHASH:
16830 part_info->linear_hash_ind= FALSE;
16831 break;
16832 case NDB_DISTRIBUTION_LINHASH:
16833 part_info->linear_hash_ind= TRUE;
16834 break;
16835 default:
16836 assert(false);
16837 break;
16838 }
16839 DBUG_VOID_RETURN;
16840 }
16841
16842
16843 static int
create_table_set_range_data(const partition_info * part_info,NdbDictionary::Table & ndbtab)16844 create_table_set_range_data(const partition_info *part_info,
16845 NdbDictionary::Table& ndbtab)
16846 {
16847 const uint num_parts = part_info->num_parts;
16848 DBUG_ENTER("create_table_set_range_data");
16849
16850 int32 *range_data= (int32*)my_malloc(PSI_INSTRUMENT_ME, num_parts*sizeof(int32), MYF(0));
16851 if (!range_data)
16852 {
16853 mem_alloc_error(num_parts*sizeof(int32));
16854 DBUG_RETURN(1);
16855 }
16856 for (uint i= 0; i < num_parts; i++)
16857 {
16858 longlong range_val= part_info->range_int_array[i];
16859 const bool unsigned_flag= part_info->part_expr->unsigned_flag;
16860 if (unsigned_flag)
16861 range_val-= 0x8000000000000000ULL;
16862 if (range_val < INT_MIN32 || range_val >= INT_MAX32)
16863 {
16864 if ((i != num_parts - 1) ||
16865 (range_val != LLONG_MAX))
16866 {
16867 my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
16868 my_free((char*)range_data, MYF(0));
16869 DBUG_RETURN(1);
16870 }
16871 range_val= INT_MAX32;
16872 }
16873 range_data[i]= (int32)range_val;
16874 }
16875 ndbtab.setRangeListData(range_data, num_parts);
16876 my_free((char*)range_data, MYF(0));
16877 DBUG_RETURN(0);
16878 }
16879
16880
16881 static int
create_table_set_list_data(const partition_info * part_info,NdbDictionary::Table & ndbtab)16882 create_table_set_list_data(const partition_info *part_info,
16883 NdbDictionary::Table& ndbtab)
16884 {
16885 const uint num_list_values = part_info->num_list_values;
16886 int32 *list_data= (int32*)my_malloc(PSI_INSTRUMENT_ME,
16887 num_list_values*2*sizeof(int32), MYF(0));
16888 DBUG_ENTER("create_table_set_list_data");
16889
16890 if (!list_data)
16891 {
16892 mem_alloc_error(num_list_values*2*sizeof(int32));
16893 DBUG_RETURN(1);
16894 }
16895 for (uint i= 0; i < num_list_values; i++)
16896 {
16897 LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
16898 longlong list_val= list_entry->list_value;
16899 const bool unsigned_flag= part_info->part_expr->unsigned_flag;
16900 if (unsigned_flag)
16901 list_val-= 0x8000000000000000ULL;
16902 if (list_val < INT_MIN32 || list_val > INT_MAX32)
16903 {
16904 my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
16905 my_free((char*)list_data, MYF(0));
16906 DBUG_RETURN(1);
16907 }
16908 list_data[2*i]= (int32)list_val;
16909 list_data[2*i+1]= list_entry->partition_id;
16910 }
16911 ndbtab.setRangeListData(list_data, 2*num_list_values);
16912 my_free((char*)list_data, MYF(0));
16913 DBUG_RETURN(0);
16914 }
16915
16916 /*
16917 User defined partitioning set-up. We need to check how many fragments the
16918 user wants defined and which node groups to put those into.
16919
16920 All the functionality of the partition function, partition limits and so
16921 forth are entirely handled by the MySQL Server. There is one exception to
16922 this rule for PARTITION BY KEY where NDB handles the hash function and
16923 this type can thus be handled transparently also by NDB API program.
16924 For RANGE, HASH and LIST and subpartitioning the NDB API programs must
16925 implement the function to map to a partition.
16926 */
16927
16928 static int
create_table_set_up_partition_info(HA_CREATE_INFO * create_info,partition_info * part_info,NdbDictionary::Table & ndbtab)16929 create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
16930 partition_info *part_info,
16931 NdbDictionary::Table& ndbtab)
16932 {
16933 DBUG_ENTER("create_table_set_up_partition_info");
16934
16935 if (part_info->part_type == HASH_PARTITION &&
16936 part_info->list_of_part_fields == TRUE)
16937 {
16938 Field **fields= part_info->part_field_array;
16939
16940 DBUG_PRINT("info", ("Using HashMapPartition fragmentation type"));
16941 ndbtab.setFragmentType(NDBTAB::HashMapPartition);
16942
16943 for (uint i= 0; i < part_info->part_field_list.elements; i++)
16944 {
16945 NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);
16946 DBUG_PRINT("info",("setting dist key on %s", col->getName()));
16947 col->setPartitionKey(TRUE);
16948 }
16949 }
16950 else
16951 {
16952 if (!current_thd->variables.new_mode)
16953 {
16954 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
16955 ER_ILLEGAL_HA_CREATE_OPTION,
16956 ER(ER_ILLEGAL_HA_CREATE_OPTION),
16957 ndbcluster_hton_name,
16958 "LIST, RANGE and HASH partition disabled by default,"
16959 " use --new option to enable");
16960 DBUG_RETURN(HA_ERR_UNSUPPORTED);
16961 }
16962 /*
16963 Create a shadow field for those tables that have user defined
16964 partitioning. This field stores the value of the partition
16965 function such that NDB can handle reorganisations of the data
16966 even when the MySQL Server isn't available to assist with
16967 calculation of the partition function value.
16968 */
16969 NDBCOL col;
16970 DBUG_PRINT("info", ("Generating partition func value field"));
16971 col.setName("$PART_FUNC_VALUE");
16972 col.setType(NdbDictionary::Column::Int);
16973 col.setLength(1);
16974 col.setNullable(FALSE);
16975 col.setPrimaryKey(FALSE);
16976 col.setAutoIncrement(FALSE);
16977 ndbtab.addColumn(col);
16978 if (part_info->part_type == RANGE_PARTITION)
16979 {
16980 const int error = create_table_set_range_data(part_info, ndbtab);
16981 if (error)
16982 {
16983 DBUG_RETURN(error);
16984 }
16985 }
16986 else if (part_info->part_type == LIST_PARTITION)
16987 {
16988 const int error = create_table_set_list_data(part_info, ndbtab);
16989 if (error)
16990 {
16991 DBUG_RETURN(error);
16992 }
16993 }
16994
16995 DBUG_PRINT("info", ("Using UserDefined fragmentation type"));
16996 ndbtab.setFragmentType(NDBTAB::UserDefined);
16997 }
16998
16999 const bool use_default_num_parts = part_info->use_default_num_partitions;
17000 ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);
17001 ndbtab.setLinearFlag(part_info->linear_hash_ind);
17002 {
17003 ha_rows max_rows= create_info->max_rows;
17004 ha_rows min_rows= create_info->min_rows;
17005 if (max_rows < min_rows)
17006 max_rows= min_rows;
17007 if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
17008 {
17009 ndbtab.setMaxRows(max_rows);
17010 ndbtab.setMinRows(min_rows);
17011 }
17012 }
17013
17014 {
17015 // Count number of fragments to use for the table and
17016 // build array describing which nodegroup should store each
17017 // partition(each partition is mapped to one fragment in the table).
17018 uint32 frag_data[MAX_PARTITIONS];
17019 ulong fd_index= 0;
17020
17021 partition_element *part_elem;
17022 List_iterator<partition_element> part_it(part_info->partitions);
17023 while((part_elem = part_it++))
17024 {
17025 if (!part_info->is_sub_partitioned())
17026 {
17027 const Uint32 ng= part_elem->nodegroup_id;
17028 assert(fd_index < NDB_ARRAY_SIZE(frag_data));
17029 frag_data[fd_index++]= ng;
17030 }
17031 else
17032 {
17033 partition_element *subpart_elem;
17034 List_iterator<partition_element> sub_it(part_elem->subpartitions);
17035 while((subpart_elem = sub_it++))
17036 {
17037 const Uint32 ng= subpart_elem->nodegroup_id;
17038 assert(fd_index < NDB_ARRAY_SIZE(frag_data));
17039 frag_data[fd_index++]= ng;
17040 }
17041 }
17042 }
17043
17044 // Double check number of partitions vs. fragments
17045 assert(part_info->get_tot_partitions() == fd_index);
17046
17047 ndbtab.setFragmentCount(fd_index);
17048 ndbtab.setFragmentData(frag_data, fd_index);
17049 }
17050 DBUG_RETURN(0);
17051 }
17052
17053 class NDB_ALTER_DATA : public inplace_alter_handler_ctx
17054 {
17055 public:
NDB_ALTER_DATA(NdbDictionary::Dictionary * dict,const NdbDictionary::Table * table)17056 NDB_ALTER_DATA(NdbDictionary::Dictionary *dict,
17057 const NdbDictionary::Table *table) :
17058 dictionary(dict),
17059 old_table(table),
17060 new_table(new NdbDictionary::Table(*table)),
17061 table_id(table->getObjectId()),
17062 old_table_version(table->getObjectVersion())
17063 {}
~NDB_ALTER_DATA()17064 ~NDB_ALTER_DATA()
17065 { delete new_table; }
17066 NdbDictionary::Dictionary *dictionary;
17067 const NdbDictionary::Table *old_table;
17068 NdbDictionary::Table *new_table;
17069 Uint32 table_id;
17070 Uint32 old_table_version;
17071 };
17072
17073 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17074 ha_ndbcluster::check_if_supported_inplace_alter(TABLE *altered_table,
17075 Alter_inplace_info *ha_alter_info)
17076 {
17077 THD *thd= current_thd;
17078 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17079 Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17080 ha_alter_info->handler_flags;
17081 const Alter_inplace_info::HA_ALTER_FLAGS supported=
17082 Alter_inplace_info::ADD_INDEX |
17083 Alter_inplace_info::DROP_INDEX |
17084 Alter_inplace_info::ADD_UNIQUE_INDEX |
17085 Alter_inplace_info::DROP_UNIQUE_INDEX |
17086 Alter_inplace_info::ADD_COLUMN |
17087 Alter_inplace_info::ALTER_COLUMN_DEFAULT |
17088 Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE |
17089 Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT |
17090 Alter_inplace_info::ADD_PARTITION |
17091 Alter_inplace_info::ALTER_TABLE_REORG |
17092 Alter_inplace_info::CHANGE_CREATE_OPTION |
17093 Alter_inplace_info::ADD_FOREIGN_KEY |
17094 Alter_inplace_info::DROP_FOREIGN_KEY |
17095 Alter_inplace_info::ALTER_INDEX_COMMENT;
17096
17097 const Alter_inplace_info::HA_ALTER_FLAGS not_supported= ~supported;
17098
17099 Alter_inplace_info::HA_ALTER_FLAGS add_column=
17100 Alter_inplace_info::ADD_COLUMN;
17101
17102 const Alter_inplace_info::HA_ALTER_FLAGS adding=
17103 Alter_inplace_info::ADD_INDEX |
17104 Alter_inplace_info::ADD_UNIQUE_INDEX;
17105
17106 const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17107 Alter_inplace_info::DROP_INDEX |
17108 Alter_inplace_info::DROP_UNIQUE_INDEX;
17109
17110 enum_alter_inplace_result result= HA_ALTER_INPLACE_SHARED_LOCK;
17111
17112 DBUG_ENTER("ha_ndbcluster::check_if_supported_inplace_alter");
17113 partition_info *part_info= altered_table->part_info;
17114 const NDBTAB *old_tab= m_table;
17115
17116 if (THDVAR(thd, use_copying_alter_table))
17117 {
17118 DBUG_PRINT("info", ("On-line alter table disabled"));
17119 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17120 }
17121
17122 DBUG_PRINT("info", ("Passed alter flags 0x%llx", alter_flags));
17123 DBUG_PRINT("info", ("Supported 0x%llx", supported));
17124 DBUG_PRINT("info", ("Not supported 0x%llx", not_supported));
17125 DBUG_PRINT("info", ("alter_flags & not_supported 0x%llx",
17126 alter_flags & not_supported));
17127
17128 bool auto_increment_value_changed= false;
17129 bool max_rows_changed= false;
17130 if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17131 {
17132 if (create_info->auto_increment_value !=
17133 table->file->stats.auto_increment_value)
17134 auto_increment_value_changed= true;
17135 if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
17136 max_rows_changed= true;
17137 }
17138
17139 if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17140 {
17141 /*
17142 sql_partition.cc tries to compute what is going on
17143 and sets flags...that we clear
17144 */
17145 if (part_info->use_default_num_partitions)
17146 {
17147 alter_flags= alter_flags & ~Alter_inplace_info::COALESCE_PARTITION;
17148 alter_flags= alter_flags & ~Alter_inplace_info::ADD_PARTITION;
17149 }
17150 }
17151
17152 if (alter_flags & Alter_inplace_info::ALTER_COLUMN_DEFAULT &&
17153 !(alter_flags & Alter_inplace_info::ADD_COLUMN))
17154 {
17155 DBUG_PRINT("info", ("Altering default value is not supported"));
17156 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17157 }
17158
17159 if (alter_flags & not_supported)
17160 {
17161 DBUG_PRINT("info", ("Detected unsupported change: 0x%llx",
17162 alter_flags & not_supported));
17163 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17164 }
17165
17166 if (alter_flags & Alter_inplace_info::ADD_COLUMN ||
17167 alter_flags & Alter_inplace_info::ADD_PARTITION ||
17168 alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
17169 max_rows_changed)
17170 {
17171 Ndb *ndb= get_ndb(thd);
17172 NDBDICT *dict= ndb->getDictionary();
17173 ndb->setDatabaseName(m_dbname);
17174 NdbDictionary::Table new_tab= *old_tab;
17175
17176 result= HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
17177 if (alter_flags & Alter_inplace_info::ADD_COLUMN)
17178 {
17179 NDBCOL col;
17180
17181 /*
17182 Check that we are only adding columns
17183 */
17184 /*
17185 HA_COLUMN_DEFAULT_VALUE & HA_COLUMN_STORAGE & HA_COLUMN_FORMAT
17186 are set if they are specified in an later cmd
17187 even if they're no change. This is probably a bug
17188 conclusion: add them to add_column-mask, so that we silently "accept" them
17189 In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set
17190 which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases
17191 */
17192 add_column|= Alter_inplace_info::ALTER_COLUMN_DEFAULT;
17193 add_column|= Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE;
17194 add_column|= Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT;
17195 if (alter_flags & ~add_column)
17196 {
17197 DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));
17198 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17199 }
17200 /*
17201 Check for extra fields for hidden primary key
17202 or user defined partitioning
17203 */
17204 if (table_share->primary_key == MAX_KEY ||
17205 part_info->part_type != HASH_PARTITION ||
17206 !part_info->list_of_part_fields)
17207 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17208
17209 /* Find the new fields */
17210 for (uint i= table->s->fields; i < altered_table->s->fields; i++)
17211 {
17212 Field *field= altered_table->field[i];
17213 DBUG_PRINT("info", ("Found new field %s", field->field_name));
17214 DBUG_PRINT("info", ("storage_type %i, column_format %i",
17215 (uint) field->field_storage_type(),
17216 (uint) field->column_format()));
17217 if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
17218 {
17219 my_ptrdiff_t src_offset= field->table->s->default_values
17220 - field->table->record[0];
17221 if ((! field->is_real_null(src_offset)) ||
17222 ((field->flags & NOT_NULL_FLAG)))
17223 {
17224 DBUG_PRINT("info",("Adding column with non-null default value is not supported on-line"));
17225 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17226 }
17227 }
17228 /* Create new field to check if it can be added */
17229 set_my_errno(create_ndb_column(thd, col, field, create_info,
17230 COLUMN_FORMAT_TYPE_DYNAMIC));
17231 if (my_errno())
17232 {
17233 DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno()));
17234 DBUG_RETURN(HA_ALTER_ERROR);
17235 }
17236 if (new_tab.addColumn(col))
17237 {
17238 set_my_errno(errno);
17239 DBUG_PRINT("info", ("NdbDictionary::Table::addColumn returned %u", my_errno()));
17240 DBUG_RETURN(HA_ALTER_ERROR);
17241 }
17242 }
17243 }
17244
17245 if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17246 {
17247 /*
17248 Refuse if Max_rows has been used before...
17249 Workaround is to use ALTER ONLINE TABLE <t> MAX_ROWS=<bigger>;
17250 */
17251 if (old_tab->getMaxRows() != 0)
17252 {
17253 push_warning(current_thd,
17254 Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
17255 "Cannot online REORGANIZE a table with Max_Rows set. "
17256 "Use ALTER TABLE ... MAX_ROWS=<new_val> or offline REORGANIZE "
17257 "to redistribute this table.");
17258 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17259 }
17260 new_tab.setFragmentCount(0);
17261 new_tab.setFragmentData(0, 0);
17262 }
17263 else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
17264 {
17265 DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));
17266 new_tab.setFragmentCount(part_info->num_parts);
17267 }
17268 if (max_rows_changed)
17269 {
17270 ulonglong rows= create_info->max_rows;
17271 uint no_fragments= get_no_fragments(rows);
17272 uint reported_frags= no_fragments;
17273 if (adjusted_frag_count(ndb, no_fragments, reported_frags))
17274 {
17275 push_warning(current_thd,
17276 Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
17277 "Ndb might have problems storing the max amount "
17278 "of rows specified");
17279 }
17280 if (reported_frags < old_tab->getFragmentCount())
17281 {
17282 DBUG_PRINT("info", ("Online reduction in number of fragments not supported"));
17283 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17284 }
17285 new_tab.setFragmentCount(reported_frags);
17286 new_tab.setDefaultNoPartitionsFlag(false);
17287 new_tab.setFragmentData(0, 0);
17288 }
17289
17290 NDB_Modifiers table_modifiers(ndb_table_modifiers);
17291 table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
17292 create_info->comment.length);
17293 const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");
17294
17295 if (mod_nologging->m_found)
17296 {
17297 new_tab.setLogging(!mod_nologging->m_val_bool);
17298 }
17299
17300 if (dict->supportedAlterTable(*old_tab, new_tab))
17301 {
17302 DBUG_PRINT("info", ("Adding column(s) supported on-line"));
17303 }
17304 else
17305 {
17306 DBUG_PRINT("info",("Adding column not supported on-line"));
17307 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17308 }
17309 }
17310
17311 /*
17312 Check that we are not adding multiple indexes
17313 */
17314 if (alter_flags & adding)
17315 {
17316 if (((altered_table->s->keys - table->s->keys) != 1) ||
17317 (alter_flags & dropping))
17318 {
17319 DBUG_PRINT("info",("Only one index can be added on-line"));
17320 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17321 }
17322 }
17323
17324 /*
17325 Check that we are not dropping multiple indexes
17326 */
17327 if (alter_flags & dropping)
17328 {
17329 if (((table->s->keys - altered_table->s->keys) != 1) ||
17330 (alter_flags & adding))
17331 {
17332 DBUG_PRINT("info",("Only one index can be dropped on-line"));
17333 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17334 }
17335 }
17336
17337 for (uint i= 0; i < table->s->fields; i++)
17338 {
17339 Field *field= table->field[i];
17340 const NDBCOL *col= m_table->getColumn(i);
17341
17342 NDBCOL new_col;
17343 create_ndb_column(0, new_col, field, create_info);
17344
17345 bool index_on_column = false;
17346 /**
17347 * Check all indexes to determine if column has index instead of checking
17348 * field->flags (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG
17349 * since field->flags appears to only be set on first column in
17350 * multi-part index
17351 */
17352 for (uint j= 0; j<table->s->keys; j++)
17353 {
17354 KEY* key_info= table->key_info + j;
17355 KEY_PART_INFO* key_part= key_info->key_part;
17356 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
17357 for (; key_part != end; key_part++)
17358 {
17359 if (key_part->field->field_index == i)
17360 {
17361 index_on_column= true;
17362 j= table->s->keys; // break outer loop
17363 break;
17364 }
17365 }
17366 }
17367
17368 if (index_on_column == false && (alter_flags & adding))
17369 {
17370 for (uint j= table->s->keys; j<altered_table->s->keys; j++)
17371 {
17372 KEY* key_info= altered_table->key_info + j;
17373 KEY_PART_INFO* key_part= key_info->key_part;
17374 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
17375 for (; key_part != end; key_part++)
17376 {
17377 if (key_part->field->field_index == i)
17378 {
17379 index_on_column= true;
17380 j= altered_table->s->keys; // break outer loop
17381 break;
17382 }
17383 }
17384 }
17385 }
17386
17387 /**
17388 * This is a "copy" of code in ::create()
17389 * that "auto-converts" columns with keys into memory
17390 * (unless storage disk is explicitly added)
17391 * This is needed to check if getStorageType() == getStorageType()
17392 * further down
17393 */
17394 if (index_on_column)
17395 {
17396 if (field->field_storage_type() == HA_SM_DISK)
17397 {
17398 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17399 }
17400 new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);
17401 }
17402 else if (field->field_storage_type() == HA_SM_DEFAULT)
17403 {
17404 /**
17405 * If user didn't specify any column format, keep old
17406 * to make as many alter's as possible online
17407 */
17408 new_col.setStorageType(col->getStorageType());
17409 }
17410
17411 if (col->getStorageType() != new_col.getStorageType())
17412 {
17413 DBUG_PRINT("info", ("Column storage media is changed"));
17414 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17415 }
17416
17417 if (field->flags & FIELD_IS_RENAMED)
17418 {
17419 DBUG_PRINT("info", ("Field has been renamed, copy table"));
17420 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17421 }
17422
17423 if ((field->flags & FIELD_IN_ADD_INDEX) &&
17424 (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))
17425 {
17426 DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
17427 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17428 }
17429 }
17430
17431 /* Check that only auto_increment value was changed */
17432 if (auto_increment_value_changed)
17433 {
17434 if (create_info->used_fields ^ ~HA_CREATE_USED_AUTO)
17435 {
17436 DBUG_PRINT("info", ("Not only auto_increment value changed"));
17437 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17438 }
17439 }
17440 else
17441 {
17442 /* Check that row format didn't change */
17443 if (create_info->used_fields & HA_CREATE_USED_AUTO &&
17444 get_row_type() != create_info->row_type)
17445 {
17446 DBUG_PRINT("info", ("Row format changed"));
17447 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17448 }
17449 }
17450 DBUG_PRINT("info", ("Ndb supports ALTER on-line"));
17451 DBUG_RETURN(result);
17452 }
17453
17454 bool
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17455 ha_ndbcluster::prepare_inplace_alter_table(TABLE *altered_table,
17456 Alter_inplace_info *ha_alter_info)
17457 {
17458 int error= 0;
17459 uint i;
17460 THD *thd= current_thd;
17461 Thd_ndb *thd_ndb= get_thd_ndb(thd);
17462 Ndb *ndb= get_ndb(thd);
17463 NDBDICT *dict= ndb->getDictionary();
17464 ndb->setDatabaseName(m_dbname);
17465
17466 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17467
17468 const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17469 ha_alter_info->handler_flags;
17470
17471 const Alter_inplace_info::HA_ALTER_FLAGS adding=
17472 Alter_inplace_info::ADD_INDEX |
17473 Alter_inplace_info::ADD_UNIQUE_INDEX;
17474
17475 const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17476 Alter_inplace_info::DROP_INDEX |
17477 Alter_inplace_info::DROP_UNIQUE_INDEX;
17478
17479 DBUG_ENTER("ha_ndbcluster::prepare_inplace_alter_table");
17480
17481 ha_alter_info->handler_ctx= 0;
17482 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::prepare_inplace_alter_table"))
17483 DBUG_RETURN(true);
17484
17485 NDB_ALTER_DATA *alter_data;
17486 if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))
17487 DBUG_RETURN(true);
17488
17489 const NDBTAB* const old_tab = alter_data->old_table;
17490 NdbDictionary::Table * const new_tab = alter_data->new_table;
17491 ha_alter_info->handler_ctx= alter_data;
17492
17493 DBUG_PRINT("info", ("altered_table: '%s, alter_flags: 0x%llx",
17494 altered_table->s->table_name.str,
17495 alter_flags));
17496
17497 bool auto_increment_value_changed= false;
17498 bool max_rows_changed= false;
17499 if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17500 {
17501 if (create_info->auto_increment_value !=
17502 table->file->stats.auto_increment_value)
17503 auto_increment_value_changed= true;
17504 if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
17505 max_rows_changed= true;
17506 }
17507
17508 prepare_for_alter();
17509
17510 if (dict->beginSchemaTrans() == -1)
17511 {
17512 DBUG_PRINT("info", ("Failed to start schema transaction"));
17513 ERR_PRINT(dict->getNdbError());
17514 error= ndb_to_mysql_error(&dict->getNdbError());
17515 table->file->print_error(error, MYF(0));
17516 goto err;
17517 }
17518
17519 if (alter_flags & adding)
17520 {
17521 KEY *key_info;
17522 KEY *key;
17523 uint *idx_p;
17524 uint *idx_end_p;
17525 KEY_PART_INFO *key_part;
17526 KEY_PART_INFO *part_end;
17527 DBUG_PRINT("info", ("Adding indexes"));
17528 key_info= (KEY*) thd->alloc(sizeof(KEY) * ha_alter_info->index_add_count);
17529 key= key_info;
17530 for (idx_p= ha_alter_info->index_add_buffer,
17531 idx_end_p= idx_p + ha_alter_info->index_add_count;
17532 idx_p < idx_end_p;
17533 idx_p++, key++)
17534 {
17535 /* Copy the KEY struct. */
17536 *key= ha_alter_info->key_info_buffer[*idx_p];
17537 /* Fix the key parts. */
17538 part_end= key->key_part + key->user_defined_key_parts;
17539 for (key_part= key->key_part; key_part < part_end; key_part++)
17540 key_part->field= table->field[key_part->fieldnr];
17541 }
17542 if ((error= add_index_impl(thd, altered_table, key_info,
17543 ha_alter_info->index_add_count)))
17544 {
17545 /*
17546 Exchange the key_info for the error message. If we exchange
17547 key number by key name in the message later, we need correct info.
17548 */
17549 KEY *save_key_info= table->key_info;
17550 table->key_info= key_info;
17551 table->file->print_error(error, MYF(0));
17552 table->key_info= save_key_info;
17553 goto abort;
17554 }
17555 }
17556
17557 if (alter_flags & dropping)
17558 {
17559 uint *key_numbers;
17560 uint *keyno_p;
17561 KEY **idx_p;
17562 KEY **idx_end_p;
17563 DBUG_PRINT("info", ("Renumbering indexes"));
17564 /* The prepare_drop_index() method takes an array of key numbers. */
17565 key_numbers= (uint*) thd->alloc(sizeof(uint) * ha_alter_info->index_drop_count);
17566 keyno_p= key_numbers;
17567 /* Get the number of each key. */
17568 for (idx_p= ha_alter_info->index_drop_buffer,
17569 idx_end_p= idx_p + ha_alter_info->index_drop_count;
17570 idx_p < idx_end_p;
17571 idx_p++, keyno_p++)
17572 {
17573 // Find the key number matching the key to be dropped
17574 KEY *keyp= *idx_p;
17575 uint i;
17576 for(i=0; i < table->s->keys; i++)
17577 {
17578 if (keyp == table->key_info + i)
17579 break;
17580 }
17581 DBUG_PRINT("info", ("Dropping index %u", i));
17582 *keyno_p= i;
17583 }
17584 /*
17585 Tell the handler to prepare for drop indexes.
17586 This re-numbers the indexes to get rid of gaps.
17587 */
17588 if ((error= prepare_drop_index(table, key_numbers,
17589 ha_alter_info->index_drop_count)))
17590 {
17591 table->file->print_error(error, MYF(0));
17592 goto abort;
17593 }
17594 }
17595
17596 if (alter_flags & Alter_inplace_info::ADD_COLUMN)
17597 {
17598 NDBCOL col;
17599
17600 /* Find the new fields */
17601 for (i= table->s->fields; i < altered_table->s->fields; i++)
17602 {
17603 Field *field= altered_table->field[i];
17604 DBUG_PRINT("info", ("Found new field %s", field->field_name));
17605 set_my_errno(create_ndb_column(thd, col, field, create_info,
17606 COLUMN_FORMAT_TYPE_DYNAMIC));
17607 if (my_errno())
17608 {
17609 error= my_errno();
17610 goto abort;
17611 }
17612 /*
17613 If the user has not specified the field format
17614 make it dynamic to enable on-line add attribute
17615 */
17616 if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&
17617 create_info->row_type == ROW_TYPE_DEFAULT &&
17618 col.getDynamic())
17619 {
17620 push_warning_printf(thd, Sql_condition::SL_WARNING,
17621 ER_ILLEGAL_HA_CREATE_OPTION,
17622 "Converted FIXED field '%s' to DYNAMIC "
17623 "to enable on-line ADD COLUMN",
17624 field->field_name);
17625 }
17626 new_tab->addColumn(col);
17627 }
17628 }
17629
17630 if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
17631 alter_flags & Alter_inplace_info::ADD_PARTITION ||
17632 max_rows_changed)
17633 {
17634 if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17635 {
17636 new_tab->setFragmentCount(0);
17637 new_tab->setFragmentData(0, 0);
17638 }
17639 else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
17640 {
17641 partition_info *part_info= altered_table->part_info;
17642 new_tab->setFragmentCount(part_info->num_parts);
17643 }
17644 else if (max_rows_changed)
17645 {
17646 ulonglong rows= create_info->max_rows;
17647 uint no_fragments= get_no_fragments(rows);
17648 uint reported_frags= no_fragments;
17649 if (adjusted_frag_count(ndb, no_fragments, reported_frags))
17650 {
17651 assert(false); /* Checked above */
17652 }
17653 if (reported_frags < old_tab->getFragmentCount())
17654 {
17655 assert(false);
17656 DBUG_RETURN(false);
17657 }
17658 /* Note we don't set the ndb table's max_rows param, as that
17659 * is considered a 'real' change
17660 */
17661 //new_tab->setMaxRows(create_info->max_rows);
17662 new_tab->setFragmentCount(reported_frags);
17663 new_tab->setDefaultNoPartitionsFlag(false);
17664 new_tab->setFragmentData(0, 0);
17665 }
17666
17667 int res= dict->prepareHashMap(*old_tab, *new_tab);
17668 if (res == -1)
17669 {
17670 const NdbError err= dict->getNdbError();
17671 set_my_errno(ndb_to_mysql_error(&err));
17672 goto abort;
17673 }
17674 }
17675
17676 if (alter_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
17677 {
17678 int res= create_fks(thd, ndb);
17679 if (res != 0)
17680 {
17681 /*
17682 Unlike CREATE, ALTER for some reason does not translate
17683 the HA_ code. So fix it to be Innodb compatible.
17684 */
17685 if (res == HA_ERR_CANNOT_ADD_FOREIGN)
17686 {
17687 DBUG_PRINT("info", ("change error %d to %d",
17688 HA_ERR_CANNOT_ADD_FOREIGN, ER_CANNOT_ADD_FOREIGN));
17689 res= ER_CANNOT_ADD_FOREIGN;
17690 }
17691 error= res;
17692 set_my_errno(error);
17693 my_error(error, MYF(0), 0);
17694 goto abort;
17695 }
17696 }
17697
17698 DBUG_RETURN(false);
17699 abort:
17700 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
17701 == -1)
17702 {
17703 DBUG_PRINT("info", ("Failed to abort schema transaction"));
17704 ERR_PRINT(dict->getNdbError());
17705 error= ndb_to_mysql_error(&dict->getNdbError());
17706 }
17707
17708 err:
17709 DBUG_RETURN(true);
17710 }
17711
alter_frm(const char * file,NDB_ALTER_DATA * alter_data)17712 int ha_ndbcluster::alter_frm(const char *file,
17713 NDB_ALTER_DATA *alter_data)
17714 {
17715 uchar *data= NULL, *pack_data= NULL;
17716 size_t length, pack_length;
17717 int error= 0;
17718
17719 DBUG_ENTER("alter_frm");
17720
17721 DBUG_PRINT("enter", ("file: %s", file));
17722
17723 NDBDICT *dict= alter_data->dictionary;
17724
17725 // TODO handle this
17726 assert(m_table != 0);
17727
17728 assert(get_ndb_share_state(m_share) == NSS_ALTERED);
17729 if (readfrm(file, &data, &length) ||
17730 packfrm(data, length, &pack_data, &pack_length))
17731 {
17732 char errbuf[MYSYS_STRERROR_SIZE];
17733 DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
17734 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
17735 my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
17736 error= 1;
17737 my_error(ER_FILE_NOT_FOUND, MYF(0), file,
17738 my_errno(), my_strerror(errbuf, sizeof(errbuf), my_errno()));
17739 }
17740 else
17741 {
17742 DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",
17743 m_tabname));
17744 const NDBTAB *old_tab= alter_data->old_table;
17745 NdbDictionary::Table *new_tab= alter_data->new_table;
17746
17747 new_tab->setFrm(pack_data, (Uint32)pack_length);
17748 if (dict->alterTableGlobal(*old_tab, *new_tab))
17749 {
17750 DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));
17751 error= ndb_to_mysql_error(&dict->getNdbError());
17752 my_error(error, MYF(0), m_tabname);
17753 }
17754 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
17755 my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
17756 }
17757
17758 /* ndb_share reference schema(?) free */
17759 DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free use_count: %u",
17760 m_share->key_string(), m_share->use_count));
17761
17762 DBUG_RETURN(error);
17763 }
17764
17765 bool
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17766 ha_ndbcluster::inplace_alter_table(TABLE *altered_table,
17767 Alter_inplace_info *ha_alter_info)
17768 {
17769 DBUG_ENTER("ha_ndbcluster::inplace_alter_table");
17770 int error= 0;
17771 THD *thd= current_thd;
17772 Thd_ndb *thd_ndb= get_thd_ndb(thd);
17773 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17774 NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17775 NDBDICT *dict= alter_data->dictionary;
17776 const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17777 ha_alter_info->handler_flags;
17778 const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17779 Alter_inplace_info::DROP_INDEX |
17780 Alter_inplace_info::DROP_UNIQUE_INDEX;
17781
17782 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::inplace_alter_table"))
17783 {
17784 DBUG_RETURN(true);
17785 }
17786
17787 bool auto_increment_value_changed= false;
17788 if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17789 {
17790 if (create_info->auto_increment_value !=
17791 table->file->stats.auto_increment_value)
17792 auto_increment_value_changed= true;
17793 }
17794
17795 if (alter_flags & dropping)
17796 {
17797 /* Tell the handler to finally drop the indexes. */
17798 if ((error= final_drop_index(table)))
17799 {
17800 print_error(error, MYF(0));
17801 goto abort;
17802 }
17803 }
17804
17805 if (alter_flags & Alter_inplace_info::DROP_FOREIGN_KEY)
17806 {
17807 const NDBTAB* tab= alter_data->old_table;
17808 if ((error= drop_fk_for_online_alter(thd, thd_ndb->ndb, dict, tab)) != 0)
17809 {
17810 print_error(error, MYF(0));
17811 goto abort;
17812 }
17813 }
17814
17815 DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));
17816 error= alter_frm(altered_table->s->path.str, alter_data);
17817 if (!error)
17818 {
17819 /*
17820 * Alter succesful, commit schema transaction
17821 */
17822 if (dict->endSchemaTrans() == -1)
17823 {
17824 error= ndb_to_mysql_error(&dict->getNdbError());
17825 DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",
17826 error));
17827 table->file->print_error(error, MYF(0));
17828 goto err;
17829 }
17830 if (auto_increment_value_changed)
17831 error= set_auto_inc_val(thd, create_info->auto_increment_value);
17832 if (error)
17833 {
17834 DBUG_PRINT("info", ("Failed to set auto_increment value"));
17835 goto err;
17836 }
17837 }
17838 else // if (error)
17839 {
17840 abort:
17841 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
17842 == -1)
17843 {
17844 DBUG_PRINT("info", ("Failed to abort schema transaction"));
17845 ERR_PRINT(dict->getNdbError());
17846 }
17847 }
17848
17849 err:
17850 DBUG_RETURN(error ? true : false);
17851 }
17852
17853 bool
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)17854 ha_ndbcluster::commit_inplace_alter_table(TABLE *altered_table,
17855 Alter_inplace_info *ha_alter_info,
17856 bool commit)
17857 {
17858 DBUG_ENTER("ha_ndbcluster::commit_inplace_alter_table");
17859
17860 if (!commit)
17861 DBUG_RETURN(abort_inplace_alter_table(altered_table,
17862 ha_alter_info));
17863 THD *thd= current_thd;
17864 Thd_ndb *thd_ndb= get_thd_ndb(thd);
17865 NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17866 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::commit_inplace_alter_table"))
17867 {
17868 DBUG_RETURN(true); // Error
17869 }
17870
17871 const char *db= table->s->db.str;
17872 const char *name= table->s->table_name.str;
17873 uint32 table_id= 0, table_version= 0;
17874 assert(alter_data != 0);
17875 if (alter_data)
17876 {
17877 table_id= alter_data->table_id;
17878 table_version= alter_data->old_table_version;
17879 }
17880 ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
17881 db, name,
17882 table_id, table_version,
17883 SOT_ONLINE_ALTER_TABLE_PREPARE,
17884 NULL, NULL);
17885 delete alter_data;
17886 ha_alter_info->handler_ctx= 0;
17887 set_ndb_share_state(m_share, NSS_INITIAL);
17888 free_share(&m_share); // Decrease ref_count
17889 DBUG_RETURN(false); // OK
17890 }
17891
17892 bool
abort_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17893 ha_ndbcluster::abort_inplace_alter_table(TABLE *altered_table,
17894 Alter_inplace_info *ha_alter_info)
17895 {
17896 DBUG_ENTER("ha_ndbcluster::abort_inplace_alter_table");
17897
17898 NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17899 if (!alter_data)
17900 {
17901 // Could not find any alter_data, nothing to abort or already aborted
17902 DBUG_RETURN(false);
17903 }
17904
17905 NDBDICT *dict= alter_data->dictionary;
17906 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort) == -1)
17907 {
17908 DBUG_PRINT("info", ("Failed to abort schema transaction"));
17909 ERR_PRINT(dict->getNdbError());
17910 }
17911 /* ndb_share reference schema free */
17912 DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u",
17913 m_share->key_string(), m_share->use_count));
17914 delete alter_data;
17915 ha_alter_info->handler_ctx= 0;
17916 set_ndb_share_state(m_share, NSS_INITIAL);
17917 free_share(&m_share); // Decrease ref_count
17918 DBUG_RETURN(false);
17919 }
17920
notify_table_changed()17921 void ha_ndbcluster::notify_table_changed()
17922 {
17923 DBUG_ENTER("ha_ndbcluster::notify_table_changed ");
17924
17925 /*
17926 all mysqld's will read frms from disk and setup new
17927 event operation for the table (new_op)
17928 */
17929 THD *thd= current_thd;
17930 const char *db= table->s->db.str;
17931 const char *name= table->s->table_name.str;
17932 uint32 table_id= 0, table_version= 0;
17933
17934 /*
17935 Get table id/version for new table
17936 */
17937 {
17938 Ndb* ndb= get_ndb(thd);
17939 assert(ndb != 0);
17940 if (ndb)
17941 {
17942 ndb->setDatabaseName(db);
17943 Ndb_table_guard ndbtab(ndb->getDictionary(), name);
17944 const NDBTAB *new_tab= ndbtab.get_table();
17945 assert(new_tab != 0);
17946 if (new_tab)
17947 {
17948 table_id= new_tab->getObjectId();
17949 table_version= new_tab->getObjectVersion();
17950 }
17951 }
17952 }
17953
17954 /*
17955 all mysqld's will switch to using the new_op, and delete the old
17956 event operation
17957 */
17958 ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
17959 db, name,
17960 table_id, table_version,
17961 SOT_ONLINE_ALTER_TABLE_COMMIT,
17962 NULL, NULL);
17963
17964 DBUG_VOID_RETURN;
17965 }
17966
17967 static
set_up_tablespace(st_alter_tablespace * alter_info,NdbDictionary::Tablespace * ndb_ts)17968 bool set_up_tablespace(st_alter_tablespace *alter_info,
17969 NdbDictionary::Tablespace *ndb_ts)
17970 {
17971 if (alter_info->extent_size >= (Uint64(1) << 32))
17972 {
17973 // TODO set correct error
17974 return TRUE;
17975 }
17976 ndb_ts->setName(alter_info->tablespace_name);
17977 ndb_ts->setExtentSize(Uint32(alter_info->extent_size));
17978 ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
17979 return FALSE;
17980 }
17981
17982 static
set_up_datafile(st_alter_tablespace * alter_info,NdbDictionary::Datafile * ndb_df)17983 bool set_up_datafile(st_alter_tablespace *alter_info,
17984 NdbDictionary::Datafile *ndb_df)
17985 {
17986 if (alter_info->max_size > 0)
17987 {
17988 my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
17989 return TRUE;
17990 }
17991 ndb_df->setPath(alter_info->data_file_name);
17992 ndb_df->setSize(alter_info->initial_size);
17993 ndb_df->setTablespace(alter_info->tablespace_name);
17994 return FALSE;
17995 }
17996
17997 static
set_up_logfile_group(st_alter_tablespace * alter_info,NdbDictionary::LogfileGroup * ndb_lg)17998 bool set_up_logfile_group(st_alter_tablespace *alter_info,
17999 NdbDictionary::LogfileGroup *ndb_lg)
18000 {
18001 if (alter_info->undo_buffer_size >= (Uint64(1) << 32))
18002 {
18003 // TODO set correct error
18004 return TRUE;
18005 }
18006
18007 ndb_lg->setName(alter_info->logfile_group_name);
18008 ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));
18009 return FALSE;
18010 }
18011
18012 static
set_up_undofile(st_alter_tablespace * alter_info,NdbDictionary::Undofile * ndb_uf)18013 bool set_up_undofile(st_alter_tablespace *alter_info,
18014 NdbDictionary::Undofile *ndb_uf)
18015 {
18016 ndb_uf->setPath(alter_info->undo_file_name);
18017 ndb_uf->setSize(alter_info->initial_size);
18018 ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
18019 return FALSE;
18020 }
18021
18022
18023 /**
18024 Get the tablespace name from the NDB dictionary for the given table in the
18025 given schema.
18026
18027 @note For NDB tables with version before 50120, the server must ask the
18028 SE for the tablespace name, because for these tables, the tablespace
18029 name is not stored in the .FRM file, but only within the SE itself.
18030
18031 @note The function is essentially doing the same as the corresponding code
18032 block in the function 'get_metadata()', except for the handling of
18033 empty strings, which are in this case returned as "" rather than NULL.
18034
18035 @param thd Thread context.
18036 @param db_name Name of the relevant schema.
18037 @param table_name Name of the relevant table.
18038 @param [out] tablespace_name Name of the tablespace containing the table.
18039
18040 @return Operation status.
18041 @retval == 0 Success.
18042 @retval != 0 Error (handler error code returned).
18043 */
18044
18045 static
ndbcluster_get_tablespace(THD * thd,LEX_CSTRING db_name,LEX_CSTRING table_name,LEX_CSTRING * tablespace_name)18046 int ndbcluster_get_tablespace(THD* thd,
18047 LEX_CSTRING db_name,
18048 LEX_CSTRING table_name,
18049 LEX_CSTRING *tablespace_name)
18050 {
18051 DBUG_ENTER("ndbcluster_get_tablespace");
18052 DBUG_PRINT("enter", ("db_name: %s, table_name: %s", db_name.str,
18053 table_name.str));
18054 assert(tablespace_name != NULL);
18055
18056 Ndb* ndb= check_ndb_in_thd(thd);
18057 if (ndb == NULL)
18058 DBUG_RETURN(HA_ERR_NO_CONNECTION);
18059
18060 NDBDICT *dict= ndb->getDictionary();
18061 const NDBTAB *tab= NULL;
18062
18063 ndb->setDatabaseName(db_name.str);
18064 Ndb_table_guard ndbtab_g(dict, table_name.str);
18065 if (!(tab= ndbtab_g.get_table()))
18066 ERR_RETURN(dict->getNdbError());
18067
18068 Uint32 id;
18069 if (tab->getTablespace(&id))
18070 {
18071 NdbDictionary::Tablespace ts= dict->getTablespace(id);
18072 NdbError ndberr= dict->getNdbError();
18073 if (ndberr.classification == NdbError::NoError)
18074 {
18075 const char *tablespace= ts.getName();
18076 assert(tablespace);
18077 const size_t tablespace_len= strlen(tablespace);
18078 DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
18079 thd->make_lex_string(tablespace_name, tablespace, tablespace_len, false);
18080 }
18081 }
18082
18083 DBUG_RETURN(0);
18084 }
18085
18086 static
ndbcluster_alter_tablespace(handlerton * hton,THD * thd,st_alter_tablespace * alter_info)18087 int ndbcluster_alter_tablespace(handlerton *hton,
18088 THD* thd, st_alter_tablespace *alter_info)
18089 {
18090 int is_tablespace= 0;
18091 NdbError err;
18092 NDBDICT *dict;
18093 int error;
18094 const char *errmsg= NULL;
18095 Ndb *ndb;
18096 DBUG_ENTER("ndbcluster_alter_tablespace");
18097
18098 ndb= check_ndb_in_thd(thd);
18099 if (ndb == NULL)
18100 {
18101 DBUG_RETURN(HA_ERR_NO_CONNECTION);
18102 }
18103 dict= ndb->getDictionary();
18104
18105 uint32 table_id= 0, table_version= 0;
18106 switch (alter_info->ts_cmd_type){
18107 case (CREATE_TABLESPACE):
18108 {
18109 error= ER_CREATE_FILEGROUP_FAILED;
18110
18111 NdbDictionary::Tablespace ndb_ts;
18112 NdbDictionary::Datafile ndb_df;
18113 NdbDictionary::ObjectId objid;
18114 if (set_up_tablespace(alter_info, &ndb_ts))
18115 {
18116 DBUG_RETURN(1);
18117 }
18118 if (set_up_datafile(alter_info, &ndb_df))
18119 {
18120 DBUG_RETURN(1);
18121 }
18122 errmsg= "TABLESPACE";
18123 if (dict->createTablespace(ndb_ts, &objid))
18124 {
18125 DBUG_PRINT("error", ("createTablespace returned %d", error));
18126 goto ndberror;
18127 }
18128 table_id = objid.getObjectId();
18129 table_version = objid.getObjectVersion();
18130 if (dict->getWarningFlags() &
18131 NdbDictionary::Dictionary::WarnExtentRoundUp)
18132 {
18133 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18134 dict->getWarningFlags(),
18135 "Extent size rounded up to kernel page size");
18136 }
18137 DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
18138 errmsg= "DATAFILE";
18139 if (dict->createDatafile(ndb_df))
18140 {
18141 err= dict->getNdbError();
18142 NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
18143 if (dict->getNdbError().code == 0 &&
18144 tmp.getObjectId() == objid.getObjectId() &&
18145 tmp.getObjectVersion() == objid.getObjectVersion())
18146 {
18147 dict->dropTablespace(tmp);
18148 }
18149
18150 DBUG_PRINT("error", ("createDatafile returned %d", error));
18151 goto ndberror2;
18152 }
18153 if (dict->getWarningFlags() &
18154 NdbDictionary::Dictionary::WarnDatafileRoundUp)
18155 {
18156 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18157 dict->getWarningFlags(),
18158 "Datafile size rounded up to extent size");
18159 }
18160 else /* produce only 1 message */
18161 if (dict->getWarningFlags() &
18162 NdbDictionary::Dictionary::WarnDatafileRoundDown)
18163 {
18164 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18165 dict->getWarningFlags(),
18166 "Datafile size rounded down to extent size");
18167 }
18168 is_tablespace= 1;
18169 break;
18170 }
18171 case (ALTER_TABLESPACE):
18172 {
18173 error= ER_ALTER_FILEGROUP_FAILED;
18174 if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
18175 {
18176 NdbDictionary::Datafile ndb_df;
18177 if (set_up_datafile(alter_info, &ndb_df))
18178 {
18179 DBUG_RETURN(1);
18180 }
18181 errmsg= " CREATE DATAFILE";
18182 NdbDictionary::ObjectId objid;
18183 if (dict->createDatafile(ndb_df, false, &objid))
18184 {
18185 goto ndberror;
18186 }
18187 table_id= objid.getObjectId();
18188 table_version= objid.getObjectVersion();
18189 if (dict->getWarningFlags() &
18190 NdbDictionary::Dictionary::WarnDatafileRoundUp)
18191 {
18192 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18193 dict->getWarningFlags(),
18194 "Datafile size rounded up to extent size");
18195 }
18196 else /* produce only 1 message */
18197 if (dict->getWarningFlags() &
18198 NdbDictionary::Dictionary::WarnDatafileRoundDown)
18199 {
18200 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18201 dict->getWarningFlags(),
18202 "Datafile size rounded down to extent size");
18203 }
18204 }
18205 else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
18206 {
18207 NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
18208 NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
18209 NdbDictionary::ObjectId objid;
18210 df.getTablespaceId(&objid);
18211 table_id = df.getObjectId();
18212 table_version = df.getObjectVersion();
18213 if (ts.getObjectId() == objid.getObjectId() &&
18214 strcmp(df.getPath(), alter_info->data_file_name) == 0)
18215 {
18216 errmsg= " DROP DATAFILE";
18217 if (dict->dropDatafile(df))
18218 {
18219 goto ndberror;
18220 }
18221 }
18222 else
18223 {
18224 DBUG_PRINT("error", ("No such datafile"));
18225 my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
18226 DBUG_RETURN(1);
18227 }
18228 }
18229 else
18230 {
18231 DBUG_PRINT("error", ("Unsupported alter tablespace: %d",
18232 alter_info->ts_alter_tablespace_type));
18233 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18234 }
18235 is_tablespace= 1;
18236 break;
18237 }
18238 case (CREATE_LOGFILE_GROUP):
18239 {
18240 error= ER_CREATE_FILEGROUP_FAILED;
18241 NdbDictionary::LogfileGroup ndb_lg;
18242 NdbDictionary::Undofile ndb_uf;
18243 NdbDictionary::ObjectId objid;
18244 if (alter_info->undo_file_name == NULL)
18245 {
18246 /*
18247 REDO files in LOGFILE GROUP not supported yet
18248 */
18249 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18250 }
18251 if (set_up_logfile_group(alter_info, &ndb_lg))
18252 {
18253 DBUG_RETURN(1);
18254 }
18255 errmsg= "LOGFILE GROUP";
18256 if (dict->createLogfileGroup(ndb_lg, &objid))
18257 {
18258 goto ndberror;
18259 }
18260 table_id = objid.getObjectId();
18261 table_version = objid.getObjectVersion();
18262 if (dict->getWarningFlags() &
18263 NdbDictionary::Dictionary::WarnUndobufferRoundUp)
18264 {
18265 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18266 dict->getWarningFlags(),
18267 "Undo buffer size rounded up to kernel page size");
18268 }
18269 DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
18270 if (set_up_undofile(alter_info, &ndb_uf))
18271 {
18272 DBUG_RETURN(1);
18273 }
18274 errmsg= "UNDOFILE";
18275 if (dict->createUndofile(ndb_uf))
18276 {
18277 err= dict->getNdbError();
18278 NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
18279 if (dict->getNdbError().code == 0 &&
18280 tmp.getObjectId() == objid.getObjectId() &&
18281 tmp.getObjectVersion() == objid.getObjectVersion())
18282 {
18283 dict->dropLogfileGroup(tmp);
18284 }
18285 goto ndberror2;
18286 }
18287 if (dict->getWarningFlags() &
18288 NdbDictionary::Dictionary::WarnUndofileRoundDown)
18289 {
18290 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18291 dict->getWarningFlags(),
18292 "Undofile size rounded down to kernel page size");
18293 }
18294 break;
18295 }
18296 case (ALTER_LOGFILE_GROUP):
18297 {
18298 error= ER_ALTER_FILEGROUP_FAILED;
18299 if (alter_info->undo_file_name == NULL)
18300 {
18301 /*
18302 REDO files in LOGFILE GROUP not supported yet
18303 */
18304 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18305 }
18306 NdbDictionary::Undofile ndb_uf;
18307 if (set_up_undofile(alter_info, &ndb_uf))
18308 {
18309 DBUG_RETURN(1);
18310 }
18311 errmsg= "CREATE UNDOFILE";
18312 NdbDictionary::ObjectId objid;
18313 if (dict->createUndofile(ndb_uf, false, &objid))
18314 {
18315 goto ndberror;
18316 }
18317 table_id = objid.getObjectId();
18318 table_version = objid.getObjectVersion();
18319 if (dict->getWarningFlags() &
18320 NdbDictionary::Dictionary::WarnUndofileRoundDown)
18321 {
18322 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18323 dict->getWarningFlags(),
18324 "Undofile size rounded down to kernel page size");
18325 }
18326 break;
18327 }
18328 case (DROP_TABLESPACE):
18329 {
18330 error= ER_DROP_FILEGROUP_FAILED;
18331 errmsg= "TABLESPACE";
18332 NdbDictionary::Tablespace ts=
18333 dict->getTablespace(alter_info->tablespace_name);
18334 table_id= ts.getObjectId();
18335 table_version= ts.getObjectVersion();
18336 if (dict->dropTablespace(ts))
18337 {
18338 goto ndberror;
18339 }
18340 is_tablespace= 1;
18341 break;
18342 }
18343 case (DROP_LOGFILE_GROUP):
18344 {
18345 error= ER_DROP_FILEGROUP_FAILED;
18346 errmsg= "LOGFILE GROUP";
18347 NdbDictionary::LogfileGroup lg=
18348 dict->getLogfileGroup(alter_info->logfile_group_name);
18349 table_id= lg.getObjectId();
18350 table_version= lg.getObjectVersion();
18351 if (dict->dropLogfileGroup(lg))
18352 {
18353 goto ndberror;
18354 }
18355 break;
18356 }
18357 case (CHANGE_FILE_TABLESPACE):
18358 {
18359 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18360 }
18361 case (ALTER_ACCESS_MODE_TABLESPACE):
18362 {
18363 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18364 }
18365 default:
18366 {
18367 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18368 }
18369 }
18370 if (is_tablespace)
18371 ndbcluster_log_schema_op(thd,
18372 thd->query().str, thd->query().length,
18373 "", alter_info->tablespace_name,
18374 table_id, table_version,
18375 SOT_TABLESPACE, NULL, NULL);
18376 else
18377 ndbcluster_log_schema_op(thd,
18378 thd->query().str, thd->query().length,
18379 "", alter_info->logfile_group_name,
18380 table_id, table_version,
18381 SOT_LOGFILE_GROUP, NULL, NULL);
18382 DBUG_RETURN(FALSE);
18383
18384 ndberror:
18385 err= dict->getNdbError();
18386 ndberror2:
18387 ndb_to_mysql_error(&err);
18388
18389 my_error(error, MYF(0), errmsg);
18390 DBUG_RETURN(1);
18391 }
18392
18393
get_num_parts(const char * name,uint * num_parts)18394 bool ha_ndbcluster::get_num_parts(const char *name, uint *num_parts)
18395 {
18396 THD *thd= current_thd;
18397 Ndb *ndb;
18398 NDBDICT *dict;
18399 int err= 0;
18400 DBUG_ENTER("ha_ndbcluster::get_num_parts");
18401
18402 set_dbname(name);
18403 set_tabname(name);
18404 for (;;)
18405 {
18406 if (check_ndb_connection(thd))
18407 {
18408 err= HA_ERR_NO_CONNECTION;
18409 break;
18410 }
18411 ndb= get_ndb(thd);
18412 ndb->setDatabaseName(m_dbname);
18413 Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
18414 if (!ndbtab_g.get_table())
18415 ERR_BREAK(dict->getNdbError(), err);
18416 *num_parts= ndbtab_g.get_table()->getFragmentCount();
18417 DBUG_RETURN(FALSE);
18418 }
18419
18420 print_error(err, MYF(0));
18421 DBUG_RETURN(TRUE);
18422 }
18423
ndbcluster_fill_files_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond)18424 static int ndbcluster_fill_files_table(handlerton *hton,
18425 THD *thd,
18426 TABLE_LIST *tables,
18427 Item *cond)
18428 {
18429 TABLE* table= tables->table;
18430 Ndb *ndb= check_ndb_in_thd(thd);
18431 NdbDictionary::Dictionary* dict= ndb->getDictionary();
18432 NdbDictionary::Dictionary::List dflist;
18433 NdbError ndberr;
18434 uint i;
18435 DBUG_ENTER("ndbcluster_fill_files_table");
18436
18437 dict->listObjects(dflist, NdbDictionary::Object::Datafile);
18438 ndberr= dict->getNdbError();
18439 if (ndberr.classification != NdbError::NoError)
18440 ERR_RETURN(ndberr);
18441
18442 for (i= 0; i < dflist.count; i++)
18443 {
18444 NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
18445 Ndb_cluster_connection_node_iter iter;
18446 uint id;
18447
18448 g_ndb_cluster_connection->init_get_next_node(iter);
18449
18450 while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
18451 {
18452 init_fill_schema_files_row(table);
18453 NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
18454 ndberr= dict->getNdbError();
18455 if(ndberr.classification != NdbError::NoError)
18456 {
18457 if (ndberr.classification == NdbError::SchemaError)
18458 continue;
18459
18460 if (ndberr.classification == NdbError::UnknownResultError)
18461 continue;
18462
18463 ERR_RETURN(ndberr);
18464 }
18465 NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
18466 ndberr= dict->getNdbError();
18467 if (ndberr.classification != NdbError::NoError)
18468 {
18469 if (ndberr.classification == NdbError::SchemaError)
18470 continue;
18471 ERR_RETURN(ndberr);
18472 }
18473
18474 table->field[IS_FILES_FILE_NAME]->set_notnull();
18475 table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
18476 system_charset_info);
18477 table->field[IS_FILES_FILE_TYPE]->set_notnull();
18478 table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
18479 system_charset_info);
18480 table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
18481 table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
18482 (uint)strlen(df.getTablespace()),
18483 system_charset_info);
18484 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18485 table->field[IS_FILES_LOGFILE_GROUP_NAME]->
18486 store(ts.getDefaultLogfileGroup(),
18487 (uint)strlen(ts.getDefaultLogfileGroup()),
18488 system_charset_info);
18489 table->field[IS_FILES_ENGINE]->set_notnull();
18490 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18491 ndbcluster_hton_name_length,
18492 system_charset_info);
18493
18494 table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
18495 table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
18496 / ts.getExtentSize(), true);
18497 table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
18498 table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
18499 / ts.getExtentSize(), true);
18500 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18501 table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
18502 table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
18503 table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);
18504 table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
18505 table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);
18506 table->field[IS_FILES_VERSION]->set_notnull();
18507 table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);
18508
18509 table->field[IS_FILES_ROW_FORMAT]->set_notnull();
18510 table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);
18511
18512 char extra[30];
18513 int len= (int)my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
18514 table->field[IS_FILES_EXTRA]->set_notnull();
18515 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18516 schema_table_store_record(thd, table);
18517 }
18518 }
18519
18520 NdbDictionary::Dictionary::List tslist;
18521 dict->listObjects(tslist, NdbDictionary::Object::Tablespace);
18522 ndberr= dict->getNdbError();
18523 if (ndberr.classification != NdbError::NoError)
18524 ERR_RETURN(ndberr);
18525
18526 for (i= 0; i < tslist.count; i++)
18527 {
18528 NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];
18529
18530 NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);
18531 ndberr= dict->getNdbError();
18532 if (ndberr.classification != NdbError::NoError)
18533 {
18534 if (ndberr.classification == NdbError::SchemaError)
18535 continue;
18536 ERR_RETURN(ndberr);
18537 }
18538
18539 init_fill_schema_files_row(table);
18540 table->field[IS_FILES_FILE_TYPE]->set_notnull();
18541 table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,
18542 system_charset_info);
18543
18544 table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
18545 table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,
18546 (uint)strlen(elt.name),
18547 system_charset_info);
18548 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18549 table->field[IS_FILES_LOGFILE_GROUP_NAME]->
18550 store(ts.getDefaultLogfileGroup(),
18551 (uint)strlen(ts.getDefaultLogfileGroup()),
18552 system_charset_info);
18553
18554 table->field[IS_FILES_ENGINE]->set_notnull();
18555 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18556 ndbcluster_hton_name_length,
18557 system_charset_info);
18558
18559 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18560 table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
18561
18562 table->field[IS_FILES_VERSION]->set_notnull();
18563 table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);
18564
18565 schema_table_store_record(thd, table);
18566 }
18567
18568 NdbDictionary::Dictionary::List uflist;
18569 dict->listObjects(uflist, NdbDictionary::Object::Undofile);
18570 ndberr= dict->getNdbError();
18571 if (ndberr.classification != NdbError::NoError)
18572 ERR_RETURN(ndberr);
18573
18574 for (i= 0; i < uflist.count; i++)
18575 {
18576 NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
18577 Ndb_cluster_connection_node_iter iter;
18578 unsigned id;
18579
18580 g_ndb_cluster_connection->init_get_next_node(iter);
18581
18582 while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
18583 {
18584 NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
18585 ndberr= dict->getNdbError();
18586 if (ndberr.classification != NdbError::NoError)
18587 {
18588 if (ndberr.classification == NdbError::SchemaError)
18589 continue;
18590 if (ndberr.classification == NdbError::UnknownResultError)
18591 continue;
18592 ERR_RETURN(ndberr);
18593 }
18594 NdbDictionary::LogfileGroup lfg=
18595 dict->getLogfileGroup(uf.getLogfileGroup());
18596 ndberr= dict->getNdbError();
18597 if (ndberr.classification != NdbError::NoError)
18598 {
18599 if (ndberr.classification == NdbError::SchemaError)
18600 continue;
18601 ERR_RETURN(ndberr);
18602 }
18603
18604 init_fill_schema_files_row(table);
18605 table->field[IS_FILES_FILE_NAME]->set_notnull();
18606 table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
18607 system_charset_info);
18608 table->field[IS_FILES_FILE_TYPE]->set_notnull();
18609 table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
18610 system_charset_info);
18611 NdbDictionary::ObjectId objid;
18612 uf.getLogfileGroupId(&objid);
18613 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18614 table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
18615 (uint)strlen(uf.getLogfileGroup()),
18616 system_charset_info);
18617 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
18618 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);
18619 table->field[IS_FILES_ENGINE]->set_notnull();
18620 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18621 ndbcluster_hton_name_length,
18622 system_charset_info);
18623
18624 table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
18625 table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);
18626 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18627 table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
18628
18629 table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
18630 table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);
18631 table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
18632 table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);
18633
18634 table->field[IS_FILES_VERSION]->set_notnull();
18635 table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);
18636
18637 char extra[100];
18638 int len= (int)my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
18639 id, (ulong) lfg.getUndoBufferSize());
18640 table->field[IS_FILES_EXTRA]->set_notnull();
18641 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18642 schema_table_store_record(thd, table);
18643 }
18644 }
18645
18646 // now for LFGs
18647 NdbDictionary::Dictionary::List lfglist;
18648 dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
18649 ndberr= dict->getNdbError();
18650 if (ndberr.classification != NdbError::NoError)
18651 ERR_RETURN(ndberr);
18652
18653 for (i= 0; i < lfglist.count; i++)
18654 {
18655 NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];
18656
18657 NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
18658 ndberr= dict->getNdbError();
18659 if (ndberr.classification != NdbError::NoError)
18660 {
18661 if (ndberr.classification == NdbError::SchemaError)
18662 continue;
18663 ERR_RETURN(ndberr);
18664 }
18665
18666 init_fill_schema_files_row(table);
18667 table->field[IS_FILES_FILE_TYPE]->set_notnull();
18668 table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
18669 system_charset_info);
18670
18671 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18672 table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
18673 (uint)strlen(elt.name),
18674 system_charset_info);
18675 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
18676 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);
18677 table->field[IS_FILES_ENGINE]->set_notnull();
18678 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18679 ndbcluster_hton_name_length,
18680 system_charset_info);
18681
18682 table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
18683 table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);
18684 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18685 table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
18686
18687 table->field[IS_FILES_VERSION]->set_notnull();
18688 table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);
18689
18690 char extra[100];
18691 int len= (int)my_snprintf(extra,sizeof(extra),
18692 "UNDO_BUFFER_SIZE=%lu",
18693 (ulong) lfg.getUndoBufferSize());
18694 table->field[IS_FILES_EXTRA]->set_notnull();
18695 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18696 schema_table_store_record(thd, table);
18697 }
18698 DBUG_RETURN(0);
18699 }
18700
show_ndb_vars(THD * thd,SHOW_VAR * var,char * buff)18701 static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)
18702 {
18703 if (!check_ndb_in_thd(thd))
18704 return -1;
18705 struct st_ndb_status *st;
18706 SHOW_VAR *st_var;
18707 {
18708 char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +
18709 sizeof(ndb_status_variables_dynamic));
18710 st= new (mem) st_ndb_status;
18711 st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));
18712 memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));
18713 int i= 0;
18714 SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);
18715 for (; tmp->value; tmp++, i++)
18716 st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);
18717 }
18718 {
18719 Thd_ndb *thd_ndb= get_thd_ndb(thd);
18720 Ndb_cluster_connection *c= thd_ndb->connection;
18721 update_status_variables(thd_ndb, st, c);
18722 }
18723 var->type= SHOW_ARRAY;
18724 var->value= (char *) st_var;
18725 return 0;
18726 }
18727
18728 SHOW_VAR ndb_status_variables_export[]= {
18729 {"Ndb", (char*) &show_ndb_vars, SHOW_FUNC, SHOW_SCOPE_GLOBAL},
18730 {"Ndb_conflict", (char*) &show_ndb_conflict_status_vars, SHOW_FUNC, SHOW_SCOPE_GLOBAL},
18731 {"Ndb", (char*) &ndb_status_injector_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18732 {"Ndb", (char*) &ndb_status_slave_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18733 {"Ndb", (char*) &show_ndb_server_api_stats, SHOW_FUNC, SHOW_SCOPE_GLOBAL},
18734 {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18735 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
18736 };
18737
18738
cache_check_time_update(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18739 static void cache_check_time_update(MYSQL_THD thd,
18740 struct st_mysql_sys_var *var,
18741 void *var_ptr,
18742 const void *save)
18743 {
18744 push_warning_printf(thd, Sql_condition::SL_WARNING,
18745 ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT,
18746 ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT),
18747 "@@ndb_cache_check_time");
18748
18749 opt_ndb_cache_check_time= *static_cast<const ulong*>(save);
18750 }
18751
18752
18753 static MYSQL_SYSVAR_ULONG(
18754 cache_check_time, /* name */
18755 opt_ndb_cache_check_time, /* var */
18756 PLUGIN_VAR_RQCMDARG,
18757 "A dedicated thread is created to, at the given "
18758 "millisecond interval, invalidate the query cache "
18759 "if another MySQL server in the cluster has changed "
18760 "the data in the database. "
18761 "This variable is deprecated and will be removed in a future release.",
18762 NULL, /* check func. */
18763 &cache_check_time_update, /* update func. */
18764 0, /* default */
18765 0, /* min */
18766 ONE_YEAR_IN_SECONDS, /* max */
18767 0 /* block */
18768 );
18769
18770
18771 static MYSQL_SYSVAR_ULONG(
18772 extra_logging, /* name */
18773 opt_ndb_extra_logging, /* var */
18774 PLUGIN_VAR_OPCMDARG,
18775 "Turn on more logging in the error log.",
18776 NULL, /* check func. */
18777 NULL, /* update func. */
18778 1, /* default */
18779 0, /* min */
18780 0, /* max */
18781 0 /* block */
18782 );
18783
18784
18785 static MYSQL_SYSVAR_ULONG(
18786 wait_connected, /* name */
18787 opt_ndb_wait_connected, /* var */
18788 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18789 "Time (in seconds) for mysqld to wait for connection "
18790 "to cluster management and data nodes.",
18791 NULL, /* check func. */
18792 NULL, /* update func. */
18793 30, /* default */
18794 0, /* min */
18795 ONE_YEAR_IN_SECONDS, /* max */
18796 0 /* block */
18797 );
18798
18799
18800 static MYSQL_SYSVAR_ULONG(
18801 wait_setup, /* name */
18802 opt_ndb_wait_setup, /* var */
18803 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18804 "Time (in seconds) for mysqld to wait for setup to "
18805 "complete (0 = no wait)",
18806 NULL, /* check func. */
18807 NULL, /* update func. */
18808 30, /* default */
18809 0, /* min */
18810 ONE_YEAR_IN_SECONDS, /* max */
18811 0 /* block */
18812 );
18813
18814 static const int MAX_CLUSTER_CONNECTIONS = 63;
18815
18816 static MYSQL_SYSVAR_UINT(
18817 cluster_connection_pool, /* name */
18818 opt_ndb_cluster_connection_pool, /* var */
18819 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18820 "Pool of cluster connections to be used by mysql server.",
18821 NULL, /* check func. */
18822 NULL, /* update func. */
18823 1, /* default */
18824 1, /* min */
18825 MAX_CLUSTER_CONNECTIONS, /* max */
18826 0 /* block */
18827 );
18828
18829 static const int MIN_ACTIVATION_THRESHOLD = 0;
18830 static const int MAX_ACTIVATION_THRESHOLD = 16;
18831
18832 static
18833 int
ndb_recv_thread_activation_threshold_check(MYSQL_THD thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)18834 ndb_recv_thread_activation_threshold_check(MYSQL_THD thd,
18835 struct st_mysql_sys_var *var,
18836 void *save,
18837 struct st_mysql_value *value)
18838 {
18839 long long int_buf;
18840 int val = (int)value->val_int(value, &int_buf);
18841 int new_val = (int)int_buf;
18842
18843 if (val != 0 ||
18844 new_val < MIN_ACTIVATION_THRESHOLD ||
18845 new_val > MAX_ACTIVATION_THRESHOLD)
18846 {
18847 return 1;
18848 }
18849 opt_ndb_recv_thread_activation_threshold = new_val;
18850 return 0;
18851 }
18852
18853 static
18854 void
ndb_recv_thread_activation_threshold_update(MYSQL_THD,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18855 ndb_recv_thread_activation_threshold_update(MYSQL_THD,
18856 struct st_mysql_sys_var *var,
18857 void *var_ptr,
18858 const void *save)
18859 {
18860 ndb_set_recv_thread_activation_threshold(
18861 opt_ndb_recv_thread_activation_threshold);
18862 }
18863
18864 static MYSQL_SYSVAR_UINT(
18865 recv_thread_activation_threshold, /* name */
18866 opt_ndb_recv_thread_activation_threshold, /* var */
18867 PLUGIN_VAR_RQCMDARG,
18868 "Activation threshold when receive thread takes over the polling "
18869 "of the cluster connection (measured in concurrently active "
18870 "threads)",
18871 ndb_recv_thread_activation_threshold_check, /* check func. */
18872 ndb_recv_thread_activation_threshold_update, /* update func. */
18873 8, /* default */
18874 MIN_ACTIVATION_THRESHOLD, /* min */
18875 MAX_ACTIVATION_THRESHOLD, /* max */
18876 0 /* block */
18877 );
18878
18879
18880 /* Definitions needed for receive thread cpu mask config variable */
18881 static const int ndb_recv_thread_cpu_mask_option_buf_size = 512;
18882 char ndb_recv_thread_cpu_mask_option_buf[ndb_recv_thread_cpu_mask_option_buf_size];
18883 Uint16 recv_thread_cpuid_array[1 * MAX_CLUSTER_CONNECTIONS];
18884
18885 static
18886 int
ndb_recv_thread_cpu_mask_check(MYSQL_THD thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)18887 ndb_recv_thread_cpu_mask_check(MYSQL_THD thd,
18888 struct st_mysql_sys_var *var,
18889 void *save,
18890 struct st_mysql_value *value)
18891 {
18892 char buf[ndb_recv_thread_cpu_mask_option_buf_size];
18893 int len = sizeof(buf);
18894 const char *str = value->val_str(value, buf, &len);
18895
18896 return ndb_recv_thread_cpu_mask_check_str(str);
18897 }
18898
18899 static int
ndb_recv_thread_cpu_mask_check_str(const char * str)18900 ndb_recv_thread_cpu_mask_check_str(const char *str)
18901 {
18902 unsigned i;
18903 SparseBitmask bitmask;
18904
18905 recv_thread_num_cpus = 0;
18906 if (str == 0)
18907 {
18908 /* Setting to empty string is interpreted as remove locking to CPU */
18909 return 0;
18910 }
18911
18912 if (parse_mask(str, bitmask) < 0)
18913 {
18914 sql_print_information("Trying to set ndb_recv_thread_cpu_mask to"
18915 " illegal value = %s, ignored",
18916 str);
18917 goto error;
18918 }
18919 for (i = bitmask.find(0);
18920 i != SparseBitmask::NotFound;
18921 i = bitmask.find(i + 1))
18922 {
18923 if (recv_thread_num_cpus ==
18924 1 * MAX_CLUSTER_CONNECTIONS)
18925 {
18926 sql_print_information("Trying to set too many CPU's in "
18927 "ndb_recv_thread_cpu_mask, ignored"
18928 " this variable, erroneus value = %s",
18929 str);
18930 goto error;
18931 }
18932 recv_thread_cpuid_array[recv_thread_num_cpus++] = i;
18933 }
18934 return 0;
18935 error:
18936 return 1;
18937 }
18938
18939 static
18940 void
ndb_recv_thread_cpu_mask_update()18941 ndb_recv_thread_cpu_mask_update()
18942 {
18943 ndb_set_recv_thread_cpu(recv_thread_cpuid_array,
18944 recv_thread_num_cpus);
18945 }
18946
18947 static
18948 void
ndb_recv_thread_cpu_mask_update_func(MYSQL_THD,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18949 ndb_recv_thread_cpu_mask_update_func(MYSQL_THD,
18950 struct st_mysql_sys_var *var,
18951 void *var_ptr,
18952 const void *save)
18953 {
18954 ndb_recv_thread_cpu_mask_update();
18955 }
18956
18957 static MYSQL_SYSVAR_STR(
18958 recv_thread_cpu_mask, /* name */
18959 opt_ndb_recv_thread_cpu_mask, /* var */
18960 PLUGIN_VAR_RQCMDARG,
18961 "CPU mask for locking receiver threads to specific CPU, specified "
18962 " as hexadecimal as e.g. 0x33, one CPU is used per receiver thread.",
18963 ndb_recv_thread_cpu_mask_check, /* check func. */
18964 ndb_recv_thread_cpu_mask_update_func,/* update func. */
18965 ndb_recv_thread_cpu_mask_option_buf
18966 );
18967
18968 /* should be in index_stat.h */
18969
18970 extern int
18971 ndb_index_stat_option_check(MYSQL_THD,
18972 struct st_mysql_sys_var *var,
18973 void *save,
18974 struct st_mysql_value *value);
18975 extern void
18976 ndb_index_stat_option_update(MYSQL_THD,
18977 struct st_mysql_sys_var *var,
18978 void *var_ptr,
18979 const void *save);
18980
18981 extern char ndb_index_stat_option_buf[];
18982
18983 static MYSQL_SYSVAR_STR(
18984 index_stat_option, /* name */
18985 opt_ndb_index_stat_option, /* var */
18986 PLUGIN_VAR_RQCMDARG,
18987 "Comma-separated tunable options for ndb index statistics",
18988 ndb_index_stat_option_check, /* check func. */
18989 ndb_index_stat_option_update, /* update func. */
18990 ndb_index_stat_option_buf
18991 );
18992
18993
18994 ulong opt_ndb_report_thresh_binlog_epoch_slip;
18995 static MYSQL_SYSVAR_ULONG(
18996 report_thresh_binlog_epoch_slip, /* name */
18997 opt_ndb_report_thresh_binlog_epoch_slip,/* var */
18998 PLUGIN_VAR_RQCMDARG,
18999 "Threshold on number of epochs to be behind before reporting binlog "
19000 "status. E.g. 3 means that if the difference between what epoch has "
19001 "been received from the storage nodes and what has been applied to "
19002 "the binlog is 3 or more, a status message will be sent to the cluster "
19003 "log.",
19004 NULL, /* check func. */
19005 NULL, /* update func. */
19006 3, /* default */
19007 0, /* min */
19008 256, /* max */
19009 0 /* block */
19010 );
19011
19012
19013 ulong opt_ndb_report_thresh_binlog_mem_usage;
19014 static MYSQL_SYSVAR_ULONG(
19015 report_thresh_binlog_mem_usage, /* name */
19016 opt_ndb_report_thresh_binlog_mem_usage,/* var */
19017 PLUGIN_VAR_RQCMDARG,
19018 "Threshold on percentage of free memory before reporting binlog "
19019 "status. E.g. 10 means that if amount of available memory for "
19020 "receiving binlog data from the storage nodes goes below 10%, "
19021 "a status message will be sent to the cluster log.",
19022 NULL, /* check func. */
19023 NULL, /* update func. */
19024 10, /* default */
19025 0, /* min */
19026 100, /* max */
19027 0 /* block */
19028 );
19029
19030
19031 ulong opt_ndb_eventbuffer_max_alloc;
19032 static MYSQL_SYSVAR_ULONG(
19033 eventbuffer_max_alloc, /* name */
19034 opt_ndb_eventbuffer_max_alloc, /* var */
19035 PLUGIN_VAR_RQCMDARG,
19036 "Maximum memory that can be allocated for buffering "
19037 "events by the ndb api.",
19038 NULL, /* check func. */
19039 NULL, /* update func. */
19040 0, /* default */
19041 0, /* min */
19042 UINT_MAX32, /* max */
19043 0 /* block */
19044 );
19045
19046
19047 uint opt_ndb_eventbuffer_free_percent;
19048 static MYSQL_SYSVAR_UINT(
19049 eventbuffer_free_percent, /* name */
19050 opt_ndb_eventbuffer_free_percent,/* var */
19051 PLUGIN_VAR_RQCMDARG,
19052 "Percentage of free memory that should be available "
19053 "in event buffer before resuming buffering "
19054 "after the max_alloc limit is hit.",
19055 NULL, /* check func. */
19056 NULL, /* update func. */
19057 20, /* default */
19058 1, /* min */
19059 99, /* max */
19060 0 /* block */
19061 );
19062
19063
19064 my_bool opt_ndb_log_update_as_write;
19065 static MYSQL_SYSVAR_BOOL(
19066 log_update_as_write, /* name */
19067 opt_ndb_log_update_as_write, /* var */
19068 PLUGIN_VAR_OPCMDARG,
19069 "For efficiency log only after image as a write event. "
19070 "Ignore before image. This may cause compatibility problems if "
19071 "replicating to other storage engines than ndbcluster.",
19072 NULL, /* check func. */
19073 NULL, /* update func. */
19074 1 /* default */
19075 );
19076
19077 my_bool opt_ndb_log_update_minimal;
19078 static MYSQL_SYSVAR_BOOL(
19079 log_update_minimal, /* name */
19080 opt_ndb_log_update_minimal, /* var */
19081 PLUGIN_VAR_OPCMDARG,
19082 "For efficiency, log updates in a minimal format"
19083 "Log only the primary key value(s) in the before "
19084 "image. Log only the changed columns in the after "
19085 "image. This may cause compatibility problems if "
19086 "replicating to other storage engines than ndbcluster.",
19087 NULL, /* check func. */
19088 NULL, /* update func. */
19089 0 /* default */
19090 );
19091
19092 my_bool opt_ndb_log_updated_only;
19093 static MYSQL_SYSVAR_BOOL(
19094 log_updated_only, /* name */
19095 opt_ndb_log_updated_only, /* var */
19096 PLUGIN_VAR_OPCMDARG,
19097 "For efficiency log only updated columns. Columns are considered "
19098 "as \"updated\" even if they are updated with the same value. "
19099 "This may cause compatibility problems if "
19100 "replicating to other storage engines than ndbcluster.",
19101 NULL, /* check func. */
19102 NULL, /* update func. */
19103 1 /* default */
19104 );
19105
19106 my_bool opt_ndb_log_empty_update;
19107 static MYSQL_SYSVAR_BOOL(
19108 log_empty_update, /* name */
19109 opt_ndb_log_empty_update, /* var */
19110 PLUGIN_VAR_OPCMDARG,
19111 "Normally empty updates are filtered away "
19112 "before they are logged. However, for read tracking "
19113 "in conflict resolution a hidden pesudo attribute is "
19114 "set which will result in an empty update along with "
19115 "special flags set. For this to work empty updates "
19116 "have to be allowed.",
19117 NULL, /* check func. */
19118 NULL, /* update func. */
19119 0 /* default */
19120 );
19121
19122 my_bool opt_ndb_log_orig;
19123 static MYSQL_SYSVAR_BOOL(
19124 log_orig, /* name */
19125 opt_ndb_log_orig, /* var */
19126 PLUGIN_VAR_OPCMDARG,
19127 "Log originating server id and epoch in ndb_binlog_index. Each epoch "
19128 "may in this case have multiple rows in ndb_binlog_index, one for "
19129 "each originating epoch.",
19130 NULL, /* check func. */
19131 NULL, /* update func. */
19132 0 /* default */
19133 );
19134
19135
19136 my_bool opt_ndb_log_bin;
19137 static MYSQL_SYSVAR_BOOL(
19138 log_bin, /* name */
19139 opt_ndb_log_bin, /* var */
19140 PLUGIN_VAR_OPCMDARG,
19141 "Log ndb tables in the binary log. Option only has meaning if "
19142 "the binary log has been turned on for the server.",
19143 NULL, /* check func. */
19144 NULL, /* update func. */
19145 1 /* default */
19146 );
19147
19148
19149 my_bool opt_ndb_log_binlog_index;
19150 static MYSQL_SYSVAR_BOOL(
19151 log_binlog_index, /* name */
19152 opt_ndb_log_binlog_index, /* var */
19153 PLUGIN_VAR_OPCMDARG,
19154 "Insert mapping between epochs and binlog positions into the "
19155 "ndb_binlog_index table.",
19156 NULL, /* check func. */
19157 NULL, /* update func. */
19158 1 /* default */
19159 );
19160
19161
19162 static my_bool opt_ndb_log_empty_epochs;
19163 static MYSQL_SYSVAR_BOOL(
19164 log_empty_epochs, /* name */
19165 opt_ndb_log_empty_epochs, /* var */
19166 PLUGIN_VAR_OPCMDARG,
19167 "",
19168 NULL, /* check func. */
19169 NULL, /* update func. */
19170 0 /* default */
19171 );
19172
ndb_log_empty_epochs(void)19173 bool ndb_log_empty_epochs(void)
19174 {
19175 return opt_ndb_log_empty_epochs;
19176 }
19177
19178 my_bool opt_ndb_log_apply_status;
19179 static MYSQL_SYSVAR_BOOL(
19180 log_apply_status, /* name */
19181 opt_ndb_log_apply_status, /* var */
19182 PLUGIN_VAR_OPCMDARG,
19183 "Log ndb_apply_status updates from Master in the Binlog",
19184 NULL, /* check func. */
19185 NULL, /* update func. */
19186 0 /* default */
19187 );
19188
19189
19190 my_bool opt_ndb_log_transaction_id;
19191 static MYSQL_SYSVAR_BOOL(
19192 log_transaction_id, /* name */
19193 opt_ndb_log_transaction_id, /* var */
19194 PLUGIN_VAR_OPCMDARG,
19195 "Log Ndb transaction identities per row in the Binlog",
19196 NULL, /* check func. */
19197 NULL, /* update func. */
19198 0 /* default */
19199 );
19200
19201
19202 static MYSQL_SYSVAR_STR(
19203 connectstring, /* name */
19204 opt_ndb_connectstring, /* var */
19205 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19206 "Connect string for ndbcluster.",
19207 NULL, /* check func. */
19208 NULL, /* update func. */
19209 NULL /* default */
19210 );
19211
19212
19213 static MYSQL_SYSVAR_STR(
19214 mgmd_host, /* name */
19215 opt_ndb_connectstring, /* var */
19216 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19217 "Same as --ndb-connectstring",
19218 NULL, /* check func. */
19219 NULL, /* update func. */
19220 NULL /* default */
19221 );
19222
19223
19224 static MYSQL_SYSVAR_UINT(
19225 nodeid, /* name */
19226 opt_ndb_nodeid, /* var */
19227 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19228 "Set nodeid for this node. Overrides node id specified "
19229 "in --ndb-connectstring.",
19230 NULL, /* check func. */
19231 NULL, /* update func. */
19232 0, /* default */
19233 0, /* min */
19234 MAX_NODES_ID, /* max */
19235 0 /* block */
19236 );
19237
19238 static const char* slave_conflict_role_names[] =
19239 {
19240 "NONE",
19241 "SECONDARY",
19242 "PRIMARY",
19243 "PASS",
19244 NullS
19245 };
19246
19247 static TYPELIB slave_conflict_role_typelib =
19248 {
19249 array_elements(slave_conflict_role_names) - 1,
19250 "",
19251 slave_conflict_role_names,
19252 NULL
19253 };
19254
19255
19256 /**
19257 * slave_conflict_role_check_func.
19258 *
19259 * Perform most validation of a role change request.
19260 * Inspired by sql_plugin.cc::check_func_enum()
19261 */
slave_conflict_role_check_func(THD * thd,struct st_mysql_sys_var * var,void * save,st_mysql_value * value)19262 static int slave_conflict_role_check_func(THD *thd, struct st_mysql_sys_var *var,
19263 void *save, st_mysql_value *value)
19264 {
19265 char buff[STRING_BUFFER_USUAL_SIZE];
19266 const char *str;
19267 long long tmp;
19268 long result;
19269 int length;
19270
19271 do
19272 {
19273 if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING)
19274 {
19275 length= sizeof(buff);
19276 if (!(str= value->val_str(value, buff, &length)))
19277 break;
19278 if ((result= (long)find_type(str, &slave_conflict_role_typelib, 0) - 1) < 0)
19279 break;
19280 }
19281 else
19282 {
19283 if (value->val_int(value, &tmp))
19284 break;
19285 if (tmp < 0 || tmp >= slave_conflict_role_typelib.count)
19286 break;
19287 result= (long) tmp;
19288 }
19289
19290 const char* failure_cause_str = NULL;
19291 if (!st_ndb_slave_state::checkSlaveConflictRoleChange(
19292 (enum_slave_conflict_role) opt_ndb_slave_conflict_role,
19293 (enum_slave_conflict_role) result,
19294 &failure_cause_str))
19295 {
19296 char msgbuf[256];
19297 my_snprintf(msgbuf,
19298 sizeof(msgbuf),
19299 "Role change from %s to %s failed : %s",
19300 get_type(&slave_conflict_role_typelib, opt_ndb_slave_conflict_role),
19301 get_type(&slave_conflict_role_typelib, result),
19302 failure_cause_str);
19303
19304 thd->raise_error_printf(ER_ERROR_WHEN_EXECUTING_COMMAND,
19305 "SET GLOBAL ndb_slave_conflict_role",
19306 msgbuf);
19307
19308 break;
19309 }
19310
19311 /* Ok */
19312 *(long*)save= result;
19313 return 0;
19314 } while (0);
19315 /* Error */
19316 return 1;
19317 };
19318
19319 /**
19320 * slave_conflict_role_update_func
19321 *
19322 * Perform actual change of role, using saved 'long' enum value
19323 * prepared by the update func above.
19324 *
19325 * Inspired by sql_plugin.cc::update_func_long()
19326 */
slave_conflict_role_update_func(THD * thd,struct st_mysql_sys_var * var,void * tgt,const void * save)19327 static void slave_conflict_role_update_func(THD *thd, struct st_mysql_sys_var *var,
19328 void *tgt, const void *save)
19329 {
19330 *(long *)tgt= *(long *) save;
19331 };
19332
19333 static MYSQL_SYSVAR_ENUM(
19334 slave_conflict_role, /* Name */
19335 opt_ndb_slave_conflict_role, /* Var */
19336 PLUGIN_VAR_RQCMDARG,
19337 "Role for Slave to play in asymmetric conflict algorithms.",
19338 slave_conflict_role_check_func, /* Check func */
19339 slave_conflict_role_update_func, /* Update func */
19340 SCR_NONE, /* Default value */
19341 &slave_conflict_role_typelib /* typelib */
19342 );
19343
19344 #ifndef NDEBUG
19345
19346 static
19347 void
dbg_check_shares_update(THD *,st_mysql_sys_var *,void *,const void *)19348 dbg_check_shares_update(THD*, st_mysql_sys_var*, void*, const void*)
19349 {
19350 sql_print_information("dbug_check_shares open:");
19351 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
19352 {
19353 NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
19354 sql_print_information(" %s.%s: state: %s(%u) use_count: %u",
19355 share->db, share->table_name,
19356 get_share_state_string(share->state),
19357 (unsigned)share->state,
19358 share->use_count);
19359 assert(share->state != NSS_DROPPED);
19360 }
19361
19362 sql_print_information("dbug_check_shares dropped:");
19363 for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
19364 {
19365 NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
19366 sql_print_information(" %s.%s: state: %s(%u) use_count: %u",
19367 share->db, share->table_name,
19368 get_share_state_string(share->state),
19369 (unsigned)share->state,
19370 share->use_count);
19371 assert(share->state == NSS_DROPPED);
19372 }
19373
19374 /**
19375 * Only shares in mysql database may be open...
19376 */
19377 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
19378 {
19379 NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
19380 assert(strcmp(share->db, "mysql") == 0);
19381 }
19382
19383 /**
19384 * Only shares in mysql database may be open...
19385 */
19386 for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
19387 {
19388 NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
19389 assert(strcmp(share->db, "mysql") == 0);
19390 }
19391 }
19392
19393 static MYSQL_THDVAR_UINT(
19394 dbg_check_shares, /* name */
19395 PLUGIN_VAR_RQCMDARG,
19396 "Debug, only...check that no shares are lingering...",
19397 NULL, /* check func */
19398 dbg_check_shares_update, /* update func */
19399 0, /* default */
19400 0, /* min */
19401 1, /* max */
19402 0 /* block */
19403 );
19404
19405 #endif
19406
19407 static struct st_mysql_sys_var* system_variables[]= {
19408 MYSQL_SYSVAR(cache_check_time),
19409 MYSQL_SYSVAR(extra_logging),
19410 MYSQL_SYSVAR(wait_connected),
19411 MYSQL_SYSVAR(wait_setup),
19412 MYSQL_SYSVAR(cluster_connection_pool),
19413 MYSQL_SYSVAR(recv_thread_activation_threshold),
19414 MYSQL_SYSVAR(recv_thread_cpu_mask),
19415 MYSQL_SYSVAR(report_thresh_binlog_mem_usage),
19416 MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),
19417 MYSQL_SYSVAR(eventbuffer_max_alloc),
19418 MYSQL_SYSVAR(eventbuffer_free_percent),
19419 MYSQL_SYSVAR(log_update_as_write),
19420 MYSQL_SYSVAR(log_updated_only),
19421 MYSQL_SYSVAR(log_update_minimal),
19422 MYSQL_SYSVAR(log_empty_update),
19423 MYSQL_SYSVAR(log_orig),
19424 MYSQL_SYSVAR(distribution),
19425 MYSQL_SYSVAR(autoincrement_prefetch_sz),
19426 MYSQL_SYSVAR(force_send),
19427 MYSQL_SYSVAR(use_exact_count),
19428 MYSQL_SYSVAR(use_transactions),
19429 MYSQL_SYSVAR(use_copying_alter_table),
19430 MYSQL_SYSVAR(optimized_node_selection),
19431 MYSQL_SYSVAR(batch_size),
19432 MYSQL_SYSVAR(optimization_delay),
19433 MYSQL_SYSVAR(index_stat_enable),
19434 MYSQL_SYSVAR(index_stat_option),
19435 MYSQL_SYSVAR(table_no_logging),
19436 MYSQL_SYSVAR(table_temporary),
19437 MYSQL_SYSVAR(log_bin),
19438 MYSQL_SYSVAR(log_binlog_index),
19439 MYSQL_SYSVAR(log_empty_epochs),
19440 MYSQL_SYSVAR(log_apply_status),
19441 MYSQL_SYSVAR(log_transaction_id),
19442 MYSQL_SYSVAR(connectstring),
19443 MYSQL_SYSVAR(mgmd_host),
19444 MYSQL_SYSVAR(nodeid),
19445 MYSQL_SYSVAR(blob_read_batch_bytes),
19446 MYSQL_SYSVAR(blob_write_batch_bytes),
19447 MYSQL_SYSVAR(deferred_constraints),
19448 MYSQL_SYSVAR(join_pushdown),
19449 MYSQL_SYSVAR(log_exclusive_reads),
19450 #ifndef NDEBUG
19451 MYSQL_SYSVAR(dbg_check_shares),
19452 #endif
19453 MYSQL_SYSVAR(version),
19454 MYSQL_SYSVAR(version_string),
19455 MYSQL_SYSVAR(show_foreign_key_mock_tables),
19456 MYSQL_SYSVAR(slave_conflict_role),
19457 NULL
19458 };
19459
19460 struct st_mysql_storage_engine ndbcluster_storage_engine=
19461 { MYSQL_HANDLERTON_INTERFACE_VERSION };
19462
19463
19464 extern struct st_mysql_plugin i_s_ndb_transid_mysql_connection_map_plugin;
19465 extern struct st_mysql_plugin ndbinfo_plugin;
19466
mysql_declare_plugin(ndbcluster)19467 mysql_declare_plugin(ndbcluster)
19468 {
19469 MYSQL_STORAGE_ENGINE_PLUGIN,
19470 &ndbcluster_storage_engine,
19471 ndbcluster_hton_name,
19472 "MySQL AB",
19473 "Clustered, fault-tolerant tables",
19474 PLUGIN_LICENSE_GPL,
19475 ndbcluster_init, /* plugin init */
19476 NULL, /* plugin deinit */
19477 0x0100, /* plugin version */
19478 ndb_status_variables_export,/* status variables */
19479 system_variables, /* system variables */
19480 NULL, /* config options */
19481 0 /* flags */
19482 },
19483 ndbinfo_plugin, /* ndbinfo plugin */
19484 /* IS plugin table which maps between mysql connection id and ndb trans-id */
19485 i_s_ndb_transid_mysql_connection_map_plugin
19486 mysql_declare_plugin_end;
19487
19488