1 /* Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 This file defines the NDB Cluster handler: the interface between
28 MySQL and NDB Cluster
29 */
30
31 #include "ha_ndbcluster_glue.h"
32
33 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
34 #include "ha_ndbcluster.h"
35 #include <ndbapi/NdbApi.hpp>
36 #include <util/Bitmask.hpp>
37 #include <ndbapi/NdbIndexStat.hpp>
38 #include <ndbapi/NdbInterpretedCode.hpp>
39 #include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
40 #include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
41
42 #include "ha_ndbcluster_binlog.h"
43 #include "ha_ndbcluster_push.h"
44 #include "ha_ndbcluster_cond.h"
45 #include "ha_ndbcluster_tables.h"
46 #include "ha_ndbcluster_connection.h"
47 #include "ndb_thd.h"
48 #include "ndb_table_guard.h"
49 #include "ndb_global_schema_lock.h"
50 #include "ndb_global_schema_lock_guard.h"
51 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
52 #include "abstract_query_plan.h"
53 #endif
54 #include "ndb_dist_priv_util.h"
55 #include "ha_ndb_index_stat.h"
56
57 #include <mysql/plugin.h>
58 #include <ndb_version.h>
59 #include "ndb_mi.h"
60
61 // ndb interface initialization/cleanup
62 extern "C" void ndb_init_internal();
63 extern "C" void ndb_end_internal();
64
65 static const int DEFAULT_PARALLELISM= 0;
66 static const ha_rows DEFAULT_AUTO_PREFETCH= 32;
67 static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;
68
69 ulong opt_ndb_extra_logging;
70 static ulong opt_ndb_wait_connected;
71 ulong opt_ndb_wait_setup;
72 static ulong opt_ndb_cache_check_time;
73 static uint opt_ndb_cluster_connection_pool;
74 static char* opt_ndb_index_stat_option;
75 static char* opt_ndb_connectstring;
76 static uint opt_ndb_nodeid;
77
78 static MYSQL_THDVAR_UINT(
79 autoincrement_prefetch_sz, /* name */
80 PLUGIN_VAR_RQCMDARG,
81 "Specify number of autoincrement values that are prefetched.",
82 NULL, /* check func. */
83 NULL, /* update func. */
84 1, /* default */
85 1, /* min */
86 65535, /* max */
87 0 /* block */
88 );
89
90
91 static MYSQL_THDVAR_BOOL(
92 force_send, /* name */
93 PLUGIN_VAR_OPCMDARG,
94 "Force send of buffers to ndb immediately without waiting for "
95 "other threads.",
96 NULL, /* check func. */
97 NULL, /* update func. */
98 1 /* default */
99 );
100
101
102 static MYSQL_THDVAR_BOOL(
103 use_exact_count, /* name */
104 PLUGIN_VAR_OPCMDARG,
105 "Use exact records count during query planning and for fast "
106 "select count(*), disable for faster queries.",
107 NULL, /* check func. */
108 NULL, /* update func. */
109 0 /* default */
110 );
111
112
113 static MYSQL_THDVAR_BOOL(
114 use_transactions, /* name */
115 PLUGIN_VAR_OPCMDARG,
116 "Use transactions for large inserts, if enabled then large "
117 "inserts will be split into several smaller transactions",
118 NULL, /* check func. */
119 NULL, /* update func. */
120 1 /* default */
121 );
122
123
124 static MYSQL_THDVAR_BOOL(
125 use_copying_alter_table, /* name */
126 PLUGIN_VAR_OPCMDARG,
127 "Force ndbcluster to always copy tables at alter table (should "
128 "only be used if on-line alter table fails).",
129 NULL, /* check func. */
130 NULL, /* update func. */
131 0 /* default */
132 );
133
134
135 static MYSQL_THDVAR_UINT(
136 optimized_node_selection, /* name */
137 PLUGIN_VAR_OPCMDARG,
138 "Select nodes for transactions in a more optimal way.",
139 NULL, /* check func. */
140 NULL, /* update func. */
141 3, /* default */
142 0, /* min */
143 3, /* max */
144 0 /* block */
145 );
146
147
148 static MYSQL_THDVAR_ULONG(
149 batch_size, /* name */
150 PLUGIN_VAR_RQCMDARG,
151 "Batch size in bytes.",
152 NULL, /* check func. */
153 NULL, /* update func. */
154 32768, /* default */
155 0, /* min */
156 ONE_YEAR_IN_SECONDS, /* max */
157 0 /* block */
158 );
159
160
161 static MYSQL_THDVAR_ULONG(
162 optimization_delay, /* name */
163 PLUGIN_VAR_RQCMDARG,
164 "For optimize table, specifies the delay in milliseconds "
165 "for each batch of rows sent.",
166 NULL, /* check func. */
167 NULL, /* update func. */
168 10, /* default */
169 0, /* min */
170 100000, /* max */
171 0 /* block */
172 );
173
174 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
175 #define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE
176 #else
177 #define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE
178 #endif
179
180 static MYSQL_THDVAR_BOOL(
181 index_stat_enable, /* name */
182 PLUGIN_VAR_OPCMDARG,
183 "Use ndb index statistics in query optimization.",
184 NULL, /* check func. */
185 NULL, /* update func. */
186 DEFAULT_NDB_INDEX_STAT_ENABLE /* default */
187 );
188
189
190 static MYSQL_THDVAR_ULONG(
191 index_stat_cache_entries, /* name */
192 PLUGIN_VAR_NOCMDARG,
193 "Obsolete (ignored and will be removed later).",
194 NULL, /* check func. */
195 NULL, /* update func. */
196 32, /* default */
197 0, /* min */
198 ULONG_MAX, /* max */
199 0 /* block */
200 );
201
202
203 static MYSQL_THDVAR_ULONG(
204 index_stat_update_freq, /* name */
205 PLUGIN_VAR_NOCMDARG,
206 "Obsolete (ignored and will be removed later).",
207 NULL, /* check func. */
208 NULL, /* update func. */
209 20, /* default */
210 0, /* min */
211 ULONG_MAX, /* max */
212 0 /* block */
213 );
214
215
216 static MYSQL_THDVAR_BOOL(
217 table_no_logging, /* name */
218 PLUGIN_VAR_NOCMDARG,
219 "",
220 NULL, /* check func. */
221 NULL, /* update func. */
222 FALSE /* default */
223 );
224
225
226 static MYSQL_THDVAR_BOOL(
227 table_temporary, /* name */
228 PLUGIN_VAR_NOCMDARG,
229 "",
230 NULL, /* check func. */
231 NULL, /* update func. */
232 FALSE /* default */
233 );
234
235 static MYSQL_THDVAR_UINT(
236 blob_read_batch_bytes, /* name */
237 PLUGIN_VAR_RQCMDARG,
238 "Specifies the bytesize large Blob reads "
239 "should be batched into. 0 == No limit.",
240 NULL, /* check func */
241 NULL, /* update func */
242 65536, /* default */
243 0, /* min */
244 UINT_MAX, /* max */
245 0 /* block */
246 );
247
248 static MYSQL_THDVAR_UINT(
249 blob_write_batch_bytes, /* name */
250 PLUGIN_VAR_RQCMDARG,
251 "Specifies the bytesize large Blob writes "
252 "should be batched into. 0 == No limit.",
253 NULL, /* check func */
254 NULL, /* update func */
255 65536, /* default */
256 0, /* min */
257 UINT_MAX, /* max */
258 0 /* block */
259 );
260
261 static MYSQL_THDVAR_UINT(
262 deferred_constraints, /* name */
263 PLUGIN_VAR_RQCMDARG,
264 "Specified that constraints should be checked deferred (when supported)",
265 NULL, /* check func */
266 NULL, /* update func */
267 0, /* default */
268 0, /* min */
269 1, /* max */
270 0 /* block */
271 );
272
273 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
274 #define DEFAULT_NDB_JOIN_PUSHDOWN FALSE
275 #else
276 #define DEFAULT_NDB_JOIN_PUSHDOWN TRUE
277 #endif
278
279 static MYSQL_THDVAR_BOOL(
280 join_pushdown, /* name */
281 PLUGIN_VAR_OPCMDARG,
282 "Enable pushing down of join to datanodes",
283 NULL, /* check func. */
284 NULL, /* update func. */
285 DEFAULT_NDB_JOIN_PUSHDOWN /* default */
286 );
287
288 /*
289 Required in index_stat.cc but available only from here
290 thanks to use of top level anonymous structs.
291 */
ndb_index_stat_get_enable(THD * thd)292 bool ndb_index_stat_get_enable(THD *thd)
293 {
294 const bool value = THDVAR(thd, index_stat_enable);
295 return value;
296 }
297
298 static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
299 static bool ndbcluster_show_status(handlerton *hton, THD*,
300 stat_print_fn *,
301 enum ha_stat_type);
302 static int ndbcluster_alter_tablespace(handlerton *hton,
303 THD* thd,
304 st_alter_tablespace *info);
305 static int ndbcluster_fill_files_table(handlerton *hton,
306 THD *thd,
307 TABLE_LIST *tables,
308 Item *cond);
309
310 #if MYSQL_VERSION_ID >= 50501
311 /**
312 Used to fill in INFORMATION_SCHEMA* tables.
313
314 @param hton handle to the handlerton structure
315 @param thd the thread/connection descriptor
316 @param[in,out] tables the information schema table that is filled up
317 @param cond used for conditional pushdown to storage engine
318 @param schema_table_idx the table id that distinguishes the type of table
319
320 @return Operation status
321 */
322 static int
ndbcluster_fill_is_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond,enum enum_schema_tables schema_table_idx)323 ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,
324 Item *cond, enum enum_schema_tables schema_table_idx)
325 {
326 if (schema_table_idx == SCH_FILES)
327 return ndbcluster_fill_files_table(hton, thd, tables, cond);
328 return 0;
329 }
330 #endif
331
332
333 handlerton *ndbcluster_hton;
334
ndbcluster_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)335 static handler *ndbcluster_create_handler(handlerton *hton,
336 TABLE_SHARE *table,
337 MEM_ROOT *mem_root)
338 {
339 return new (mem_root) ha_ndbcluster(hton, table);
340 }
341
342 static uint
ndbcluster_partition_flags()343 ndbcluster_partition_flags()
344 {
345 return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
346 HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
347 }
348
349 #ifndef NDB_WITHOUT_ONLINE_ALTER
350 static uint
ndbcluster_alter_table_flags(uint flags)351 ndbcluster_alter_table_flags(uint flags)
352 {
353 if (flags & ALTER_DROP_PARTITION)
354 return 0;
355 else
356 return (HA_PARTITION_FUNCTION_SUPPORTED);
357 }
358 #else
359 static uint
ndbcluster_alter_table_flags(uint flags)360 ndbcluster_alter_table_flags(uint flags)
361 {
362 const uint f=
363 HA_PARTITION_FUNCTION_SUPPORTED |
364 0;
365
366 if (flags & Alter_info::ALTER_DROP_PARTITION)
367 return 0;
368
369 return f;
370 }
371 #endif
372
373 #define NDB_AUTO_INCREMENT_RETRIES 100
374 #define BATCH_FLUSH_SIZE (32768)
375 /*
376 Room for 10 instruction words, two labels (@ 2words/label)
377 + 2 extra words for the case of resolve_size == 8
378 */
379 #define MAX_CONFLICT_INTERPRETED_PROG_SIZE 16
380
381 static int ndb_to_mysql_error(const NdbError *ndberr);
382
383 #define ERR_PRINT(err) \
384 DBUG_PRINT("error", ("%d message: %s", err.code, err.message))
385
386 #define ERR_RETURN(err) \
387 { \
388 const NdbError& tmp= err; \
389 DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
390 }
391
392 #define ERR_BREAK(err, code) \
393 { \
394 const NdbError& tmp= err; \
395 code= ndb_to_mysql_error(&tmp); \
396 break; \
397 }
398
399 #define ERR_SET(err, code) \
400 { \
401 const NdbError& tmp= err; \
402 code= ndb_to_mysql_error(&tmp); \
403 }
404
405 static int ndbcluster_inited= 0;
406 int ndbcluster_terminating= 0;
407
408 /*
409 Indicator and CONDVAR used to delay client and slave
410 connections until Ndb has Binlog setup
411 (bug#46955)
412 */
413 int ndb_setup_complete= 0;
414 pthread_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex
415
416 extern Ndb* g_ndb;
417
418 uchar g_node_id_map[max_ndb_nodes];
419
420 /// Handler synchronization
421 pthread_mutex_t ndbcluster_mutex;
422
423 /// Table lock handling
424 HASH ndbcluster_open_tables;
425
426 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
427 my_bool not_used MY_ATTRIBUTE((unused)));
428
429 static void modify_shared_stats(NDB_SHARE *share,
430 Ndb_local_table_statistics *local_stat);
431
432 static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,
433 const NdbRecord *, struct Ndb_statistics *,
434 bool have_lock= FALSE,
435 uint part_id= ~(uint)0);
436
437 THD *injector_thd= 0;
438
439 // Util thread variables
440 pthread_t ndb_util_thread;
441 int ndb_util_thread_running= 0;
442 pthread_mutex_t LOCK_ndb_util_thread;
443 pthread_cond_t COND_ndb_util_thread;
444 pthread_cond_t COND_ndb_util_ready;
445 pthread_handler_t ndb_util_thread_func(void *arg);
446
447 // Index stats thread variables
448 pthread_t ndb_index_stat_thread;
449 int ndb_index_stat_thread_running= 0;
450 pthread_mutex_t LOCK_ndb_index_stat_thread;
451 pthread_cond_t COND_ndb_index_stat_thread;
452 pthread_cond_t COND_ndb_index_stat_ready;
453 pthread_mutex_t ndb_index_stat_list_mutex;
454 pthread_mutex_t ndb_index_stat_stat_mutex;
455 pthread_cond_t ndb_index_stat_stat_cond;
456 pthread_handler_t ndb_index_stat_thread_func(void *arg);
457
458 extern void ndb_index_stat_free(NDB_SHARE *share);
459 extern void ndb_index_stat_end();
460
461 /* Status variables shown with 'show status like 'Ndb%' */
462
463 struct st_ndb_status g_ndb_status;
464
465 long g_ndb_status_index_stat_cache_query = 0;
466 long g_ndb_status_index_stat_cache_clean = 0;
467
468 long long g_event_data_count = 0;
469 long long g_event_nondata_count = 0;
470 long long g_event_bytes_count = 0;
471
472 static long long g_slave_api_client_stats[Ndb::NumClientStatistics];
473
474 static long long g_server_api_client_stats[Ndb::NumClientStatistics];
475
476 void
update_slave_api_stats(Ndb * ndb)477 update_slave_api_stats(Ndb* ndb)
478 {
479 for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)
480 g_slave_api_client_stats[i] = ndb->getClientStat(i);
481 }
482
483 st_ndb_slave_state g_ndb_slave_state;
484
st_ndb_slave_state()485 st_ndb_slave_state::st_ndb_slave_state()
486 : current_conflict_defined_op_count(0),
487 current_master_server_epoch(0),
488 current_max_rep_epoch(0),
489 max_rep_epoch(0),
490 sql_run_id(~Uint32(0))
491 {
492 memset(current_violation_count, 0, sizeof(current_violation_count));
493 memset(total_violation_count, 0, sizeof(total_violation_count));
494 };
495
496 void
atTransactionAbort()497 st_ndb_slave_state::atTransactionAbort()
498 {
499 /* Reset current-transaction counters + state */
500 memset(current_violation_count, 0, sizeof(current_violation_count));
501 current_conflict_defined_op_count = 0;
502 current_max_rep_epoch = 0;
503 }
504
505 void
atTransactionCommit()506 st_ndb_slave_state::atTransactionCommit()
507 {
508 /* Merge committed transaction counters into total state
509 * Then reset current transaction counters
510 */
511 for (int i=0; i < CFT_NUMBER_OF_CFTS; i++)
512 {
513 total_violation_count[i]+= current_violation_count[i];
514 current_violation_count[i] = 0;
515 }
516 current_conflict_defined_op_count = 0;
517 if (current_max_rep_epoch > max_rep_epoch)
518 {
519 DBUG_PRINT("info", ("Max replicated epoch increases from %llu to %llu",
520 max_rep_epoch,
521 current_max_rep_epoch));
522
523 max_rep_epoch = current_max_rep_epoch;
524 }
525 current_max_rep_epoch = 0;
526 }
527
528 void
atApplyStatusWrite(Uint32 master_server_id,Uint32 row_server_id,Uint64 row_epoch,bool is_row_server_id_local)529 st_ndb_slave_state::atApplyStatusWrite(Uint32 master_server_id,
530 Uint32 row_server_id,
531 Uint64 row_epoch,
532 bool is_row_server_id_local)
533 {
534 if (row_server_id == master_server_id)
535 {
536 /*
537 WRITE_ROW to ndb_apply_status injected by MySQLD
538 immediately upstream of us.
539 Record epoch
540 */
541 current_master_server_epoch = row_epoch;
542 assert(! is_row_server_id_local);
543 }
544 else if (is_row_server_id_local)
545 {
546 DBUG_PRINT("info", ("Recording application of local server %u epoch %llu "
547 " which is %s.",
548 row_server_id, row_epoch,
549 (row_epoch > g_ndb_slave_state.current_max_rep_epoch)?
550 " new highest." : " older than previously applied"));
551 if (row_epoch > current_max_rep_epoch)
552 {
553 /*
554 Store new highest epoch in thdvar. If we commit successfully
555 then this can become the new global max
556 */
557 current_max_rep_epoch = row_epoch;
558 }
559 }
560 }
561
562 void
atResetSlave()563 st_ndb_slave_state::atResetSlave()
564 {
565 /* Reset the Maximum replicated epoch vars
566 * on slave reset
567 * No need to touch the sql_run_id as that
568 * will increment if the slave is started
569 * again.
570 */
571 current_max_rep_epoch = 0;
572 max_rep_epoch = 0;
573 }
574
check_slave_state(THD * thd)575 static int check_slave_state(THD* thd)
576 {
577 DBUG_ENTER("check_slave_state");
578
579 #ifdef HAVE_NDB_BINLOG
580 if (!thd->slave_thread)
581 DBUG_RETURN(0);
582
583 const Uint32 runId = ndb_mi_get_slave_run_id();
584 DBUG_PRINT("info", ("Slave SQL thread run id is %u",
585 runId));
586 if (unlikely(runId != g_ndb_slave_state.sql_run_id))
587 {
588 DBUG_PRINT("info", ("Slave run id changed from %u, "
589 "treating as Slave restart",
590 g_ndb_slave_state.sql_run_id));
591 g_ndb_slave_state.sql_run_id = runId;
592
593 /* Always try to load the Max Replicated Epoch info
594 * first.
595 * Could be made optional if it's a problem
596 */
597 {
598 /*
599 Load highest replicated epoch from a local
600 MySQLD from the cluster.
601 */
602 DBUG_PRINT("info", ("Loading applied epoch information from %s",
603 NDB_APPLY_TABLE));
604 NdbError ndb_error;
605 Uint64 highestAppliedEpoch = 0;
606 do
607 {
608 Ndb* ndb= check_ndb_in_thd(thd);
609 NDBDICT* dict= ndb->getDictionary();
610 NdbTransaction* trans= NULL;
611 ndb->setDatabaseName(NDB_REP_DB);
612 Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
613
614 const NDBTAB* ndbtab= ndbtab_g.get_table();
615 if (unlikely(ndbtab == NULL))
616 {
617 ndb_error = dict->getNdbError();
618 break;
619 }
620
621 trans= ndb->startTransaction();
622 if (unlikely(trans == NULL))
623 {
624 ndb_error = ndb->getNdbError();
625 break;
626 }
627
628 do
629 {
630 NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);
631 if (unlikely(sop == NULL))
632 {
633 ndb_error = trans->getNdbError();
634 break;
635 }
636
637 const Uint32 server_id_col_num = 0;
638 const Uint32 epoch_col_num = 1;
639 NdbRecAttr* server_id_ra = 0;
640 NdbRecAttr* epoch_ra = 0;
641
642 if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0) ||
643 ((server_id_ra = sop->getValue(server_id_col_num)) == NULL) ||
644 ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))
645 {
646 ndb_error = sop->getNdbError();
647 break;
648 }
649
650 if (trans->execute(NdbTransaction::Commit))
651 {
652 ndb_error = trans->getNdbError();
653 break;
654 }
655
656 int rc = 0;
657 while (0 == (rc= sop->nextResult(true)))
658 {
659 Uint32 serverid = server_id_ra->u_32_value();
660 Uint64 epoch = epoch_ra->u_64_value();
661
662 if ((serverid == ::server_id) ||
663 (ndb_mi_get_ignore_server_id(serverid)))
664 {
665 highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);
666 }
667 }
668
669 if (rc != 1)
670 {
671 ndb_error = sop->getNdbError();
672 break;
673 }
674 } while (0);
675
676 trans->close();
677 } while(0);
678
679 if (ndb_error.code != 0)
680 {
681 sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "
682 "at Slave start, error %u %s",
683 NDB_REP_DB,
684 NDB_APPLY_TABLE,
685 ndb_error.code, ndb_error.message);
686 }
687
688 /*
689 Set Global status variable to the Highest Applied Epoch from
690 the Cluster DB.
691 If none was found, this will be zero.
692 */
693 g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;
694 sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",
695 g_ndb_slave_state.max_rep_epoch,
696 (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),
697 (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));
698 } // Load highest replicated epoch
699 } // New Slave SQL thread run id
700 #endif
701
702 DBUG_RETURN(0);
703 }
704
705
update_status_variables(Thd_ndb * thd_ndb,st_ndb_status * ns,Ndb_cluster_connection * c)706 static int update_status_variables(Thd_ndb *thd_ndb,
707 st_ndb_status *ns,
708 Ndb_cluster_connection *c)
709 {
710 ns->connected_port= c->get_connected_port();
711 ns->connected_host= c->get_connected_host();
712 if (ns->cluster_node_id != (int) c->node_id())
713 {
714 ns->cluster_node_id= c->node_id();
715 if (&g_ndb_status == ns && g_ndb_cluster_connection == c)
716 sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",
717 ns->cluster_node_id, ns->connected_host,
718 ns->connected_port);
719 }
720 ns->number_of_replicas= 0;
721 {
722 int n= c->get_no_ready();
723 ns->number_of_ready_data_nodes= n > 0 ? n : 0;
724 }
725 ns->number_of_data_nodes= c->no_db_nodes();
726 ns->connect_count= c->get_connect_count();
727 if (thd_ndb)
728 {
729 ns->execute_count= thd_ndb->m_execute_count;
730 ns->scan_count= thd_ndb->m_scan_count;
731 ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;
732 ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;
733 ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;
734 ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;
735 ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;
736 ns->pushed_reads= thd_ndb->m_pushed_reads;
737 for (int i= 0; i < MAX_NDB_NODES; i++)
738 {
739 ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];
740 ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];
741 }
742 for (int i=0; i < Ndb::NumClientStatistics; i++)
743 {
744 ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);
745 }
746 ns->schema_locks_count= thd_ndb->schema_locks_count;
747 }
748 return 0;
749 }
750
751 /* Helper macro for definitions of NdbApi status variables */
752
753 #define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION) \
754 {"api_wait_exec_complete_count" NAME_SUFFIX, \
755 (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ], \
756 SHOW_LONGLONG}, \
757 {"api_wait_scan_result_count" NAME_SUFFIX, \
758 (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ], \
759 SHOW_LONGLONG}, \
760 {"api_wait_meta_request_count" NAME_SUFFIX, \
761 (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ], \
762 SHOW_LONGLONG}, \
763 {"api_wait_nanos_count" NAME_SUFFIX, \
764 (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ], \
765 SHOW_LONGLONG}, \
766 {"api_bytes_sent_count" NAME_SUFFIX, \
767 (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ], \
768 SHOW_LONGLONG}, \
769 {"api_bytes_received_count" NAME_SUFFIX, \
770 (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ], \
771 SHOW_LONGLONG}, \
772 {"api_trans_start_count" NAME_SUFFIX, \
773 (char*) ARRAY_LOCATION[ Ndb::TransStartCount ], \
774 SHOW_LONGLONG}, \
775 {"api_trans_commit_count" NAME_SUFFIX, \
776 (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ], \
777 SHOW_LONGLONG}, \
778 {"api_trans_abort_count" NAME_SUFFIX, \
779 (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ], \
780 SHOW_LONGLONG}, \
781 {"api_trans_close_count" NAME_SUFFIX, \
782 (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ], \
783 SHOW_LONGLONG}, \
784 {"api_pk_op_count" NAME_SUFFIX, \
785 (char*) ARRAY_LOCATION[ Ndb::PkOpCount ], \
786 SHOW_LONGLONG}, \
787 {"api_uk_op_count" NAME_SUFFIX, \
788 (char*) ARRAY_LOCATION[ Ndb::UkOpCount ], \
789 SHOW_LONGLONG}, \
790 {"api_table_scan_count" NAME_SUFFIX, \
791 (char*) ARRAY_LOCATION[ Ndb::TableScanCount ], \
792 SHOW_LONGLONG}, \
793 {"api_range_scan_count" NAME_SUFFIX, \
794 (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ], \
795 SHOW_LONGLONG}, \
796 {"api_pruned_scan_count" NAME_SUFFIX, \
797 (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ], \
798 SHOW_LONGLONG}, \
799 {"api_scan_batch_count" NAME_SUFFIX, \
800 (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ], \
801 SHOW_LONGLONG}, \
802 {"api_read_row_count" NAME_SUFFIX, \
803 (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ], \
804 SHOW_LONGLONG}, \
805 {"api_trans_local_read_row_count" NAME_SUFFIX, \
806 (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ], \
807 SHOW_LONGLONG}
808
809 SHOW_VAR ndb_status_variables_dynamic[]= {
810 {"cluster_node_id", (char*) &g_ndb_status.cluster_node_id, SHOW_LONG},
811 {"config_from_host", (char*) &g_ndb_status.connected_host, SHOW_CHAR_PTR},
812 {"config_from_port", (char*) &g_ndb_status.connected_port, SHOW_LONG},
813 //{"number_of_replicas", (char*) &g_ndb_status.number_of_replicas, SHOW_LONG},
814 {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG},
815 {"number_of_ready_data_nodes",
816 (char*) &g_ndb_status.number_of_ready_data_nodes, SHOW_LONG},
817 {"connect_count", (char*) &g_ndb_status.connect_count, SHOW_LONG},
818 {"execute_count", (char*) &g_ndb_status.execute_count, SHOW_LONG},
819 {"scan_count", (char*) &g_ndb_status.scan_count, SHOW_LONG},
820 {"pruned_scan_count", (char*) &g_ndb_status.pruned_scan_count, SHOW_LONG},
821 {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count, SHOW_LONG},
822 NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),
823 {"sorted_scan_count", (char*) &g_ndb_status.sorted_scan_count, SHOW_LONG},
824 {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined,
825 SHOW_LONG},
826 {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,
827 SHOW_LONG},
828 {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,
829 SHOW_LONG},
830 {"pushed_reads", (char*) &g_ndb_status.pushed_reads, SHOW_LONG},
831 {NullS, NullS, SHOW_LONG}
832 };
833
834 SHOW_VAR ndb_status_conflict_variables[]= {
835 {"fn_max", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX], SHOW_LONGLONG},
836 {"fn_old", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_OLD], SHOW_LONGLONG},
837 {"fn_max_del_win", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX_DEL_WIN], SHOW_LONGLONG},
838 {"fn_epoch", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_EPOCH], SHOW_LONGLONG},
839 {NullS, NullS, SHOW_LONG}
840 };
841
842 SHOW_VAR ndb_status_injector_variables[]= {
843 {"api_event_data_count_injector", (char*) &g_event_data_count, SHOW_LONGLONG},
844 {"api_event_nondata_count_injector", (char*) &g_event_nondata_count, SHOW_LONGLONG},
845 {"api_event_bytes_count_injector", (char*) &g_event_bytes_count, SHOW_LONGLONG},
846 {NullS, NullS, SHOW_LONG}
847 };
848
849 SHOW_VAR ndb_status_slave_variables[]= {
850 NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),
851 {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG},
852 {NullS, NullS, SHOW_LONG}
853 };
854
855 SHOW_VAR ndb_status_server_client_stat_variables[]= {
856 NDBAPI_COUNTERS("", &g_server_api_client_stats),
857 {"api_event_data_count",
858 (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ],
859 SHOW_LONGLONG},
860 {"api_event_nondata_count",
861 (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ],
862 SHOW_LONGLONG},
863 {"api_event_bytes_count",
864 (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ],
865 SHOW_LONGLONG},
866 {NullS, NullS, SHOW_LONG}
867 };
868
show_ndb_server_api_stats(THD * thd,SHOW_VAR * var,char * buff)869 static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)
870 {
871 /* This function is called when SHOW STATUS / INFO_SCHEMA wants
872 * to see one of our status vars
873 * We use this opportunity to :
874 * 1) Update the globals with current values
875 * 2) Return an array of var definitions, pointing to
876 * the updated globals
877 */
878 ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);
879
880 var->type= SHOW_ARRAY;
881 var->value= (char*) ndb_status_server_client_stat_variables;
882
883 return 0;
884 }
885
886 SHOW_VAR ndb_status_index_stat_variables[]= {
887 {"cache_query", (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG},
888 {"cache_clean", (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG},
889 {NullS, NullS, SHOW_LONG}
890 };
891
892 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
893 static int ndbcluster_make_pushed_join(handlerton *, THD*,AQP::Join_plan*);
894 #endif
895
896 /*
897 Error handling functions
898 */
899
900 /* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
901
ndb_to_mysql_error(const NdbError * ndberr)902 static int ndb_to_mysql_error(const NdbError *ndberr)
903 {
904 /* read the mysql mapped error code */
905 int error= ndberr->mysql_code;
906
907 switch (error)
908 {
909 /* errors for which we do not add warnings, just return mapped error code
910 */
911 case HA_ERR_NO_SUCH_TABLE:
912 case HA_ERR_KEY_NOT_FOUND:
913 return error;
914
915 /* Mapping missing, go with the ndb error code*/
916 case -1:
917 error= ndberr->code;
918 break;
919 /* Mapping exists, go with the mapped code */
920 default:
921 break;
922 }
923
924 /*
925 If we don't abort directly on warnings push a warning
926 with the internal error information
927 */
928 if (!current_thd->abort_on_warning)
929 {
930 /*
931 Push the NDB error message as warning
932 - Used to be able to use SHOW WARNINGS toget more info on what the error is
933 - Used by replication to see if the error was temporary
934 */
935 if (ndberr->status == NdbError::TemporaryError)
936 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
937 ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
938 ndberr->code, ndberr->message, "NDB");
939 else
940 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
941 ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
942 ndberr->code, ndberr->message, "NDB");
943 }
944 return error;
945 }
946
947 #ifdef HAVE_NDB_BINLOG
948
949 /* Write conflicting row to exceptions table. */
write_conflict_row(NDB_SHARE * share,NdbTransaction * trans,const uchar * row,NdbError & err)950 static int write_conflict_row(NDB_SHARE *share,
951 NdbTransaction *trans,
952 const uchar *row,
953 NdbError& err)
954 {
955 DBUG_ENTER("write_conflict_row");
956
957 /* get exceptions table */
958 NDB_CONFLICT_FN_SHARE *cfn_share= share->m_cfn_share;
959 const NDBTAB *ex_tab= cfn_share->m_ex_tab;
960 DBUG_ASSERT(ex_tab != NULL);
961
962 /* get insert op */
963 NdbOperation *ex_op= trans->getNdbOperation(ex_tab);
964 if (ex_op == NULL)
965 {
966 err= trans->getNdbError();
967 DBUG_RETURN(-1);
968 }
969 if (ex_op->insertTuple() == -1)
970 {
971 err= ex_op->getNdbError();
972 DBUG_RETURN(-1);
973 }
974 {
975 uint32 server_id= (uint32)::server_id;
976 uint32 master_server_id= (uint32) ndb_mi_get_master_server_id();
977 uint64 master_epoch= (uint64) g_ndb_slave_state.current_master_server_epoch;
978 uint32 count= (uint32)++(cfn_share->m_count);
979 if (ex_op->setValue((Uint32)0, (const char *)&(server_id)) ||
980 ex_op->setValue((Uint32)1, (const char *)&(master_server_id)) ||
981 ex_op->setValue((Uint32)2, (const char *)&(master_epoch)) ||
982 ex_op->setValue((Uint32)3, (const char *)&(count)))
983 {
984 err= ex_op->getNdbError();
985 DBUG_RETURN(-1);
986 }
987 }
988 /* copy primary keys */
989 {
990 const int fixed_cols= 4;
991 int nkey= cfn_share->m_pk_cols;
992 int k;
993 for (k= 0; k < nkey; k++)
994 {
995 DBUG_ASSERT(row != NULL);
996 const uchar* data= row + cfn_share->m_offset[k];
997 if (ex_op->setValue((Uint32)(fixed_cols + k), (const char*)data) == -1)
998 {
999 err= ex_op->getNdbError();
1000 DBUG_RETURN(-1);
1001 }
1002 }
1003 }
1004 DBUG_RETURN(0);
1005 }
1006 #endif
1007
1008 #ifdef HAVE_NDB_BINLOG
1009 int
1010 handle_conflict_op_error(Thd_ndb* thd_ndb,
1011 NdbTransaction* trans,
1012 const NdbError& err,
1013 const NdbOperation* op);
1014
1015 int
1016 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
1017 const char* tab_name,
1018 const NdbRecord* key_rec,
1019 const uchar* pk_row,
1020 enum_conflicting_op_type op_type,
1021 enum_conflict_cause conflict_cause,
1022 const NdbError& conflict_error,
1023 NdbTransaction* conflict_trans,
1024 NdbError& err);
1025 #endif
1026
1027 static const Uint32 error_op_after_refresh_op = 920;
1028
1029 inline int
check_completed_operations_pre_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1030 check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1031 const NdbOperation *first,
1032 const NdbOperation *last,
1033 uint *ignore_count)
1034 {
1035 uint ignores= 0;
1036 DBUG_ENTER("check_completed_operations_pre_commit");
1037
1038 if (unlikely(first == 0))
1039 {
1040 assert(last == 0);
1041 DBUG_RETURN(0);
1042 }
1043
1044 /*
1045 Check that all errors are "accepted" errors
1046 or exceptions to report
1047 */
1048 #ifdef HAVE_NDB_BINLOG
1049 const NdbOperation* lastUserOp = trans->getLastDefinedOperation();
1050 #endif
1051 while (true)
1052 {
1053 const NdbError &err= first->getNdbError();
1054 const bool op_has_conflict_detection = (first->getCustomData() != NULL);
1055 if (!op_has_conflict_detection)
1056 {
1057 /* 'Normal path' - ignore key (not) present, others are errors */
1058 if (err.classification != NdbError::NoError &&
1059 err.classification != NdbError::ConstraintViolation &&
1060 err.classification != NdbError::NoDataFound)
1061 {
1062 /* Non ignored error, report it */
1063 DBUG_PRINT("info", ("err.code == %u", err.code));
1064 DBUG_RETURN(err.code);
1065 }
1066 }
1067 #ifdef HAVE_NDB_BINLOG
1068 else
1069 {
1070 /*
1071 Op with conflict detection, use special error handling method
1072 */
1073
1074 if (err.classification != NdbError::NoError)
1075 {
1076 int res = handle_conflict_op_error(thd_ndb,
1077 trans,
1078 err,
1079 first);
1080 if (res != 0)
1081 DBUG_RETURN(res);
1082 }
1083 } // if (!op_has_conflict_detection)
1084 #endif
1085 if (err.classification != NdbError::NoError)
1086 ignores++;
1087
1088 if (first == last)
1089 break;
1090
1091 first= trans->getNextCompletedOperation(first);
1092 }
1093 if (ignore_count)
1094 *ignore_count= ignores;
1095 #ifdef HAVE_NDB_BINLOG
1096 /*
1097 Conflict detection related error handling above may have defined
1098 new operations on the transaction. If so, execute them now
1099 */
1100 if (trans->getLastDefinedOperation() != lastUserOp)
1101 {
1102 const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();
1103
1104 if (trans->execute(NdbTransaction::NoCommit,
1105 NdbOperation::AO_IgnoreError,
1106 thd_ndb->m_force_send))
1107 {
1108 abort();
1109 //err= trans->getNdbError();
1110 }
1111
1112 if (trans->getNdbError().code)
1113 {
1114 /* Check the result codes of the operations we added */
1115 const NdbOperation* conflict_op = NULL;
1116 do
1117 {
1118 conflict_op = trans->getNextCompletedOperation(conflict_op);
1119 assert(conflict_op != NULL);
1120 /* We will ignore 920 which represents a refreshOp or other op
1121 * arriving after a refreshOp
1122 */
1123 const NdbError& err = conflict_op->getNdbError();
1124 if ((err.code != 0) &&
1125 (err.code != (int) error_op_after_refresh_op))
1126 {
1127 if (err.status == NdbError::TemporaryError)
1128 {
1129 /* Slave will roll back and retry entire transaction. */
1130 ERR_RETURN(err);
1131 }
1132 else
1133 {
1134 char msg[FN_REFLEN];
1135 my_snprintf(msg, sizeof(msg), "Executing extra operations for "
1136 "conflict handling hit Ndb error %d '%s'",
1137 err.code, err.message);
1138 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_ERROR,
1139 ER_EXCEPTIONS_WRITE_ERROR,
1140 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
1141 /* Slave will stop replication. */
1142 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
1143 }
1144 }
1145 } while (conflict_op != last_conflict_op);
1146 }
1147 }
1148 #endif
1149 DBUG_RETURN(0);
1150 }
1151
1152 inline int
check_completed_operations(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1153 check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,
1154 const NdbOperation *first,
1155 const NdbOperation *last,
1156 uint *ignore_count)
1157 {
1158 uint ignores= 0;
1159 DBUG_ENTER("check_completed_operations");
1160
1161 if (unlikely(first == 0))
1162 {
1163 assert(last == 0);
1164 DBUG_RETURN(0);
1165 }
1166
1167 /*
1168 Check that all errors are "accepted" errors
1169 */
1170 while (true)
1171 {
1172 const NdbError &err= first->getNdbError();
1173 if (err.classification != NdbError::NoError &&
1174 err.classification != NdbError::ConstraintViolation &&
1175 err.classification != NdbError::NoDataFound)
1176 {
1177 #ifdef HAVE_NDB_BINLOG
1178 /* All conflict detection etc should be done before commit */
1179 DBUG_ASSERT((err.code != (int) error_conflict_fn_violation) &&
1180 (err.code != (int) error_op_after_refresh_op));
1181 #endif
1182 DBUG_RETURN(err.code);
1183 }
1184 if (err.classification != NdbError::NoError)
1185 ignores++;
1186
1187 if (first == last)
1188 break;
1189
1190 first= trans->getNextCompletedOperation(first);
1191 }
1192 if (ignore_count)
1193 *ignore_count= ignores;
1194 DBUG_RETURN(0);
1195 }
1196
1197 void
release_completed_operations(NdbTransaction * trans)1198 ha_ndbcluster::release_completed_operations(NdbTransaction *trans)
1199 {
1200 /**
1201 * mysqld reads/write blobs fully,
1202 * which means that it does not keep blobs
1203 * open/active over execute, which means
1204 * that it should be safe to release anything completed here
1205 *
1206 * i.e don't check for blobs, but just go ahead and release
1207 */
1208 trans->releaseCompletedOperations();
1209 trans->releaseCompletedQueries();
1210 }
1211
1212 int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1213 bool ignore_no_key,
1214 uint *ignore_count= 0);
1215 inline
execute_no_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,bool ignore_no_key,uint * ignore_count)1216 int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1217 bool ignore_no_key,
1218 uint *ignore_count)
1219 {
1220 DBUG_ENTER("execute_no_commit");
1221 ha_ndbcluster::release_completed_operations(trans);
1222 const NdbOperation *first= trans->getFirstDefinedOperation();
1223 const NdbOperation *last= trans->getLastDefinedOperation();
1224 thd_ndb->m_execute_count++;
1225 thd_ndb->m_unsent_bytes= 0;
1226 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1227 if (trans->execute(NdbTransaction::NoCommit,
1228 NdbOperation::AO_IgnoreError,
1229 thd_ndb->m_force_send))
1230 {
1231 DBUG_RETURN(-1);
1232 }
1233 if (!ignore_no_key || trans->getNdbError().code == 0)
1234 DBUG_RETURN(trans->getNdbError().code);
1235
1236 DBUG_RETURN(check_completed_operations_pre_commit(thd_ndb, trans,
1237 first, last,
1238 ignore_count));
1239 }
1240
1241 int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,
1242 int force_send, int ignore_error, uint *ignore_count= 0);
1243 inline
execute_commit(THD * thd,Thd_ndb * thd_ndb,NdbTransaction * trans,int force_send,int ignore_error,uint * ignore_count)1244 int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,
1245 int force_send, int ignore_error, uint *ignore_count)
1246 {
1247 DBUG_ENTER("execute_commit");
1248 NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;
1249 if (thd_ndb->m_unsent_bytes && !ignore_error)
1250 {
1251 /*
1252 We have unsent bytes and cannot ignore error. Calling execute
1253 with NdbOperation::AO_IgnoreError will result in possible commit
1254 of a transaction although there is an error.
1255 */
1256 ao= NdbOperation::AbortOnError;
1257 }
1258 const NdbOperation *first= trans->getFirstDefinedOperation();
1259 const NdbOperation *last= trans->getLastDefinedOperation();
1260 thd_ndb->m_execute_count++;
1261 thd_ndb->m_unsent_bytes= 0;
1262 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1263 if (trans->execute(NdbTransaction::Commit, ao, force_send))
1264 {
1265 if (thd->slave_thread)
1266 g_ndb_slave_state.atTransactionAbort();
1267 DBUG_RETURN(-1);
1268 }
1269 /* Success of some sort */
1270 if (thd->slave_thread)
1271 {
1272 g_ndb_slave_state.atTransactionCommit();
1273 }
1274 if (!ignore_error || trans->getNdbError().code == 0)
1275 DBUG_RETURN(trans->getNdbError().code);
1276 DBUG_RETURN(check_completed_operations(thd_ndb, trans, first, last,
1277 ignore_count));
1278 }
1279
1280 inline
execute_no_commit_ie(Thd_ndb * thd_ndb,NdbTransaction * trans)1281 int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)
1282 {
1283 DBUG_ENTER("execute_no_commit_ie");
1284 ha_ndbcluster::release_completed_operations(trans);
1285 int res= trans->execute(NdbTransaction::NoCommit,
1286 NdbOperation::AO_IgnoreError,
1287 thd_ndb->m_force_send);
1288 thd_ndb->m_unsent_bytes= 0;
1289 thd_ndb->m_execute_count++;
1290 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1291 DBUG_RETURN(res);
1292 }
1293
1294 /*
1295 Place holder for ha_ndbcluster thread specific data
1296 */
1297 typedef struct st_thd_ndb_share {
1298 const void *key;
1299 struct Ndb_local_table_statistics stat;
1300 } THD_NDB_SHARE;
1301 static
thd_ndb_share_get_key(THD_NDB_SHARE * thd_ndb_share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))1302 uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,
1303 my_bool not_used MY_ATTRIBUTE((unused)))
1304 {
1305 *length= sizeof(thd_ndb_share->key);
1306 return (uchar*) &thd_ndb_share->key;
1307 }
1308
Thd_ndb(THD * thd)1309 Thd_ndb::Thd_ndb(THD* thd) :
1310 m_thd(thd),
1311 schema_locks_count(0)
1312 {
1313 connection= ndb_get_cluster_connection();
1314 m_connect_count= connection->get_connect_count();
1315 ndb= new Ndb(connection, "");
1316 lock_count= 0;
1317 start_stmt_count= 0;
1318 save_point_count= 0;
1319 count= 0;
1320 trans= NULL;
1321 m_handler= NULL;
1322 m_error= FALSE;
1323 options= 0;
1324 (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,
1325 (my_hash_get_key)thd_ndb_share_get_key, 0, 0);
1326 m_unsent_bytes= 0;
1327 m_execute_count= 0;
1328 m_scan_count= 0;
1329 m_pruned_scan_count= 0;
1330 m_sorted_scan_count= 0;
1331 m_pushed_queries_defined= 0;
1332 m_pushed_queries_dropped= 0;
1333 m_pushed_queries_executed= 0;
1334 m_pushed_reads= 0;
1335 memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));
1336 memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));
1337 global_schema_lock_trans= NULL;
1338 global_schema_lock_count= 0;
1339 global_schema_lock_error= 0;
1340 init_alloc_root(&m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);
1341 }
1342
~Thd_ndb()1343 Thd_ndb::~Thd_ndb()
1344 {
1345 if (opt_ndb_extra_logging > 1)
1346 {
1347 /*
1348 print some stats about the connection at disconnect
1349 */
1350 for (int i= 0; i < MAX_NDB_NODES; i++)
1351 {
1352 if (m_transaction_hint_count[i] > 0 ||
1353 m_transaction_no_hint_count[i] > 0)
1354 {
1355 sql_print_information("tid %u: node[%u] "
1356 "transaction_hint=%u, transaction_no_hint=%u",
1357 (unsigned)current_thd->thread_id, i,
1358 m_transaction_hint_count[i],
1359 m_transaction_no_hint_count[i]);
1360 }
1361 }
1362 }
1363 if (ndb)
1364 {
1365 delete ndb;
1366 ndb= NULL;
1367 }
1368 changed_tables.empty();
1369 my_hash_free(&open_tables);
1370 free_root(&m_batch_mem_root, MYF(0));
1371 }
1372
1373
1374 inline
get_ndb(THD * thd)1375 Ndb *ha_ndbcluster::get_ndb(THD *thd)
1376 {
1377 return get_thd_ndb(thd)->ndb;
1378 }
1379
1380 /*
1381 * manage uncommitted insert/deletes during transactio to get records correct
1382 */
1383
set_rec_per_key()1384 void ha_ndbcluster::set_rec_per_key()
1385 {
1386 DBUG_ENTER("ha_ndbcluster::set_rec_per_key");
1387 /*
1388 Set up the 'rec_per_key[]' for keys which we have good knowledge
1389 about the distribution. 'rec_per_key[]' is init'ed to '0' by
1390 open_binary_frm(), which is interpreted as 'unknown' by optimizer.
1391 -> Not setting 'rec_per_key[]' will force the optimizer to use
1392 its own heuristic to estimate 'records pr. key'.
1393 */
1394 for (uint i=0 ; i < table_share->keys ; i++)
1395 {
1396 bool is_unique_index= false;
1397 KEY* key_info= table->key_info + i;
1398 switch (get_index_type(i))
1399 {
1400 case UNIQUE_INDEX:
1401 case PRIMARY_KEY_INDEX:
1402 {
1403 // Index is unique when all 'key_parts' are specified,
1404 // else distribution is unknown and not specified here.
1405 is_unique_index= true;
1406 break;
1407 }
1408 case UNIQUE_ORDERED_INDEX:
1409 case PRIMARY_KEY_ORDERED_INDEX:
1410 is_unique_index= true;
1411 // intentional fall thru to logic for ordered index
1412 case ORDERED_INDEX:
1413 // 'Records pr. key' are unknown for non-unique indexes.
1414 // (May change when we get better index statistics.)
1415 {
1416 THD *thd= current_thd;
1417 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
1418 THDVAR(thd, index_stat_enable);
1419 if (index_stat_enable)
1420 {
1421 int err= ndb_index_stat_set_rpk(i);
1422 if (err != 0 &&
1423 /* no stats is not unexpected error */
1424 err != NdbIndexStat::NoIndexStats &&
1425 /* warning was printed at first error */
1426 err != Ndb_index_stat_error_HAS_ERROR)
1427 {
1428 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1429 ER_CANT_GET_STAT, /* pun? */
1430 "index stats (RPK) for key %s:"
1431 " unexpected error %d",
1432 key_info->name, err);
1433 }
1434 }
1435 // no fallback method...
1436 break;
1437 }
1438 default:
1439 DBUG_ASSERT(false);
1440 }
1441 // set rows per key to 1 for complete key given for unique/primary index
1442 if (is_unique_index)
1443 {
1444 key_info->rec_per_key[key_info->user_defined_key_parts-1]= 1;
1445 }
1446 }
1447 DBUG_VOID_RETURN;
1448 }
1449
records()1450 ha_rows ha_ndbcluster::records()
1451 {
1452 DBUG_ENTER("ha_ndbcluster::records");
1453 DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1454 ((const NDBTAB *)m_table)->getTableId(),
1455 m_table_info->no_uncommitted_rows_count));
1456
1457 if (update_stats(table->in_use, 1) == 0)
1458 {
1459 DBUG_RETURN(stats.records);
1460 }
1461 else
1462 {
1463 DBUG_RETURN(HA_POS_ERROR);
1464 }
1465 }
1466
no_uncommitted_rows_execute_failure()1467 void ha_ndbcluster::no_uncommitted_rows_execute_failure()
1468 {
1469 DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
1470 get_thd_ndb(current_thd)->m_error= TRUE;
1471 DBUG_VOID_RETURN;
1472 }
1473
no_uncommitted_rows_update(int c)1474 void ha_ndbcluster::no_uncommitted_rows_update(int c)
1475 {
1476 DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
1477 struct Ndb_local_table_statistics *local_info= m_table_info;
1478 local_info->no_uncommitted_rows_count+= c;
1479 DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1480 ((const NDBTAB *)m_table)->getTableId(),
1481 local_info->no_uncommitted_rows_count));
1482 DBUG_VOID_RETURN;
1483 }
1484
no_uncommitted_rows_reset(THD * thd)1485 void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
1486 {
1487 DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
1488 Thd_ndb *thd_ndb= get_thd_ndb(thd);
1489 thd_ndb->count++;
1490 thd_ndb->m_error= FALSE;
1491 thd_ndb->m_unsent_bytes= 0;
1492 DBUG_VOID_RETURN;
1493 }
1494
1495
ndb_err(NdbTransaction * trans,bool have_lock)1496 int ha_ndbcluster::ndb_err(NdbTransaction *trans,
1497 bool have_lock)
1498 {
1499 THD *thd= current_thd;
1500 int res;
1501 NdbError err= trans->getNdbError();
1502 DBUG_ENTER("ndb_err");
1503
1504 switch (err.classification) {
1505 case NdbError::SchemaError:
1506 {
1507 // TODO perhaps we need to do more here, invalidate also in the cache
1508 m_table->setStatusInvalid();
1509 /* Close other open handlers not used by any thread */
1510 TABLE_LIST table_list;
1511 memset(&table_list, 0, sizeof(table_list));
1512 table_list.db= m_dbname;
1513 table_list.alias= table_list.table_name= m_tabname;
1514 close_cached_tables(thd, &table_list, have_lock, FALSE, FALSE);
1515 break;
1516 }
1517 default:
1518 break;
1519 }
1520 res= ndb_to_mysql_error(&err);
1521 DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d",
1522 err.code, res));
1523 if (res == HA_ERR_FOUND_DUPP_KEY)
1524 {
1525 char *error_data= err.details;
1526 uint dupkey= MAX_KEY;
1527
1528 for (uint i= 0; i < MAX_KEY; i++)
1529 {
1530 if (m_index[i].type == UNIQUE_INDEX ||
1531 m_index[i].type == UNIQUE_ORDERED_INDEX)
1532 {
1533 const NDBINDEX *unique_index=
1534 (const NDBINDEX *) m_index[i].unique_index;
1535 if (unique_index &&
1536 (char *) unique_index->getObjectId() == error_data)
1537 {
1538 dupkey= i;
1539 break;
1540 }
1541 }
1542 }
1543 if (m_rows_to_insert == 1)
1544 {
1545 /*
1546 We can only distinguish between primary and non-primary
1547 violations here, so we need to return MAX_KEY for non-primary
1548 to signal that key is unknown
1549 */
1550 m_dupkey= err.code == 630 ? table_share->primary_key : dupkey;
1551 }
1552 else
1553 {
1554 /* We are batching inserts, offending key is not available */
1555 m_dupkey= (uint) -1;
1556 }
1557 }
1558 DBUG_RETURN(res);
1559 }
1560
1561
1562 /**
1563 Override the default get_error_message in order to add the
1564 error message of NDB .
1565 */
1566
get_error_message(int error,String * buf)1567 bool ha_ndbcluster::get_error_message(int error,
1568 String *buf)
1569 {
1570 DBUG_ENTER("ha_ndbcluster::get_error_message");
1571 DBUG_PRINT("enter", ("error: %d", error));
1572
1573 Ndb *ndb= check_ndb_in_thd(current_thd);
1574 if (!ndb)
1575 DBUG_RETURN(FALSE);
1576
1577 const NdbError err= ndb->getNdbError(error);
1578 bool temporary= err.status==NdbError::TemporaryError;
1579 buf->set(err.message, strlen(err.message), &my_charset_bin);
1580 DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
1581 DBUG_RETURN(temporary);
1582 }
1583
1584
1585 /*
1586 field_used_length() returns the number of bytes actually used to
1587 store the data of the field. So for a varstring it includes both
1588 length byte(s) and string data, and anything after data_length()
1589 bytes are unused.
1590 */
1591 static
field_used_length(const Field * field)1592 uint32 field_used_length(const Field* field)
1593 {
1594 if (field->type() == MYSQL_TYPE_VARCHAR)
1595 {
1596 const Field_varstring* f = static_cast<const Field_varstring*>(field);
1597 return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();
1598 // ^ no 'data_length() const'
1599 }
1600 return field->pack_length();
1601 }
1602
1603
1604 /**
1605 Check if MySQL field type forces var part in ndb storage
1606 */
field_type_forces_var_part(enum_field_types type)1607 static bool field_type_forces_var_part(enum_field_types type)
1608 {
1609 switch (type) {
1610 case MYSQL_TYPE_VAR_STRING:
1611 case MYSQL_TYPE_VARCHAR:
1612 return TRUE;
1613 case MYSQL_TYPE_TINY_BLOB:
1614 case MYSQL_TYPE_BLOB:
1615 case MYSQL_TYPE_MEDIUM_BLOB:
1616 case MYSQL_TYPE_LONG_BLOB:
1617 case MYSQL_TYPE_GEOMETRY:
1618 return FALSE;
1619 default:
1620 return FALSE;
1621 }
1622 }
1623
1624 /*
1625 * This is used for every additional row operation, to update the guesstimate
1626 * of pending bytes to send, and to check if it is now time to flush a batch.
1627 */
1628 bool
add_row_check_if_batch_full_size(Thd_ndb * thd_ndb,uint size)1629 ha_ndbcluster::add_row_check_if_batch_full_size(Thd_ndb *thd_ndb, uint size)
1630 {
1631 if (thd_ndb->m_unsent_bytes == 0)
1632 free_root(&(thd_ndb->m_batch_mem_root), MY_MARK_BLOCKS_FREE);
1633
1634 uint unsent= thd_ndb->m_unsent_bytes;
1635 unsent+= size;
1636 thd_ndb->m_unsent_bytes= unsent;
1637 return unsent >= thd_ndb->m_batch_size;
1638 }
1639
1640 /*
1641 Return a generic buffer that will remain valid until after next execute.
1642
1643 The memory is freed by the first call to add_row_check_if_batch_full_size()
1644 following any execute() call. The intention is that the memory is associated
1645 with one batch of operations during batched slave updates.
1646
1647 Note in particular that using get_buffer() / copy_row_to_buffer() separately
1648 from add_row_check_if_batch_full_size() could make meory usage grow without
1649 limit, and that this sequence:
1650
1651 execute()
1652 get_buffer() / copy_row_to_buffer()
1653 add_row_check_if_batch_full_size()
1654 ...
1655 execute()
1656
1657 will free the memory already at add_row_check_if_batch_full_size() time, it
1658 will not remain valid until the second execute().
1659 */
1660 uchar *
get_buffer(Thd_ndb * thd_ndb,uint size)1661 ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)
1662 {
1663 return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);
1664 }
1665
1666 uchar *
copy_row_to_buffer(Thd_ndb * thd_ndb,const uchar * record)1667 ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)
1668 {
1669 uchar *row= get_buffer(thd_ndb, table->s->reclength);
1670 if (unlikely(!row))
1671 return NULL;
1672 memcpy(row, record, table->s->reclength);
1673 return row;
1674 }
1675
1676 /**
1677 * findBlobError
1678 * This method attempts to find an error in the hierarchy of runtime
1679 * NDBAPI objects from Blob up to transaction.
1680 * It will return -1 if no error is found, 0 if an error is found.
1681 */
findBlobError(NdbError & error,NdbBlob * pBlob)1682 int findBlobError(NdbError& error, NdbBlob* pBlob)
1683 {
1684 error= pBlob->getNdbError();
1685 if (error.code != 0)
1686 return 0;
1687
1688 const NdbOperation* pOp= pBlob->getNdbOperation();
1689 error= pOp->getNdbError();
1690 if (error.code != 0)
1691 return 0;
1692
1693 NdbTransaction* pTrans= pOp->getNdbTransaction();
1694 error= pTrans->getNdbError();
1695 if (error.code != 0)
1696 return 0;
1697
1698 /* No error on any of the objects */
1699 return -1;
1700 }
1701
1702
g_get_ndb_blobs_value(NdbBlob * ndb_blob,void * arg)1703 int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
1704 {
1705 ha_ndbcluster *ha= (ha_ndbcluster *)arg;
1706 DBUG_ENTER("g_get_ndb_blobs_value");
1707 DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));
1708
1709 if (ha->m_blob_counter == 0) /* Reset total size at start of row */
1710 ha->m_blobs_row_total_size= 0;
1711
1712 /* Count the total length needed for blob data. */
1713 int isNull;
1714 if (ndb_blob->getNull(isNull) != 0)
1715 ERR_RETURN(ndb_blob->getNdbError());
1716 if (isNull == 0) {
1717 Uint64 len64= 0;
1718 if (ndb_blob->getLength(len64) != 0)
1719 ERR_RETURN(ndb_blob->getNdbError());
1720 /* Align to Uint64. */
1721 ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);
1722 if (ha->m_blobs_row_total_size > 0xffffffff)
1723 {
1724 DBUG_ASSERT(FALSE);
1725 DBUG_RETURN(-1);
1726 }
1727 DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",
1728 ha->m_blob_counter,
1729 len64,
1730 ha->m_blobs_row_total_size));
1731 }
1732 ha->m_blob_counter++;
1733
1734 /*
1735 Wait until all blobs in this row are active, so we can allocate
1736 and use a common buffer containing all.
1737 */
1738 if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)
1739 DBUG_RETURN(0);
1740
1741 /* Reset blob counter for next row (scan scenario) */
1742 ha->m_blob_counter= 0;
1743
1744 /* Re-allocate bigger blob buffer for this row if necessary. */
1745 if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)
1746 {
1747 my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1748 DBUG_PRINT("info", ("allocate blobs buffer size %u",
1749 (uint32)(ha->m_blobs_row_total_size)));
1750 /* Windows compiler complains about my_malloc on non-size_t
1751 * validate mapping from Uint64 to size_t
1752 */
1753 if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)
1754 {
1755 ha->m_blobs_buffer= NULL;
1756 ha->m_blobs_buffer_size= 0;
1757 DBUG_RETURN(-1);
1758 }
1759
1760 ha->m_blobs_buffer=
1761 (uchar*) my_malloc((size_t) ha->m_blobs_row_total_size, MYF(MY_WME));
1762 if (ha->m_blobs_buffer == NULL)
1763 {
1764 ha->m_blobs_buffer_size= 0;
1765 DBUG_RETURN(-1);
1766 }
1767 ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;
1768 }
1769
1770 /*
1771 Now read all blob data.
1772 If we know the destination mysqld row, we also set the blob null bit and
1773 pointer/length (if not, it will be done instead in unpack_record()).
1774 */
1775 uint32 offset= 0;
1776 for (uint i= 0; i < ha->table->s->fields; i++)
1777 {
1778 Field *field= ha->table->field[i];
1779 if (! (field->flags & BLOB_FLAG))
1780 continue;
1781 NdbValue value= ha->m_value[i];
1782 if (value.blob == NULL)
1783 {
1784 DBUG_PRINT("info",("[%u] skipped", i));
1785 continue;
1786 }
1787 Field_blob *field_blob= (Field_blob *)field;
1788 NdbBlob *ndb_blob= value.blob;
1789 int isNull;
1790 if (ndb_blob->getNull(isNull) != 0)
1791 ERR_RETURN(ndb_blob->getNdbError());
1792 if (isNull == 0) {
1793 Uint64 len64= 0;
1794 if (ndb_blob->getLength(len64) != 0)
1795 ERR_RETURN(ndb_blob->getNdbError());
1796 DBUG_ASSERT(len64 < 0xffffffff);
1797 uchar *buf= ha->m_blobs_buffer + offset;
1798 uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);
1799 if (ndb_blob->readData(buf, len) != 0)
1800 {
1801 NdbError err;
1802 if (findBlobError(err, ndb_blob) == 0)
1803 {
1804 ERR_RETURN(err);
1805 }
1806 else
1807 {
1808 /* Should always have some error code set */
1809 assert(err.code != 0);
1810 ERR_RETURN(err);
1811 }
1812 }
1813 DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u",
1814 i, offset, (long) buf, len));
1815 DBUG_ASSERT(len == len64);
1816 if (ha->m_blob_destination_record)
1817 {
1818 my_ptrdiff_t ptrdiff=
1819 ha->m_blob_destination_record - ha->table->record[0];
1820 field_blob->move_field_offset(ptrdiff);
1821 field_blob->set_ptr(len, buf);
1822 field_blob->set_notnull();
1823 field_blob->move_field_offset(-ptrdiff);
1824 }
1825 offset+= Uint32((len64 + 7) & ~((Uint64)7));
1826 }
1827 else if (ha->m_blob_destination_record)
1828 {
1829 /* Have to set length even in this case. */
1830 my_ptrdiff_t ptrdiff=
1831 ha->m_blob_destination_record - ha->table->record[0];
1832 uchar *buf= ha->m_blobs_buffer + offset;
1833 field_blob->move_field_offset(ptrdiff);
1834 field_blob->set_ptr((uint32)0, buf);
1835 field_blob->set_null();
1836 field_blob->move_field_offset(-ptrdiff);
1837 DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
1838 }
1839 }
1840
1841 if (!ha->m_active_cursor)
1842 {
1843 /* Non-scan, Blob reads have been issued
1844 * execute them and then close the Blob
1845 * handles
1846 */
1847 for (uint i= 0; i < ha->table->s->fields; i++)
1848 {
1849 Field *field= ha->table->field[i];
1850 if (! (field->flags & BLOB_FLAG))
1851 continue;
1852 NdbValue value= ha->m_value[i];
1853 if (value.blob == NULL)
1854 {
1855 DBUG_PRINT("info",("[%u] skipped", i));
1856 continue;
1857 }
1858 NdbBlob *ndb_blob= value.blob;
1859
1860 assert(ndb_blob->getState() == NdbBlob::Active);
1861
1862 /* Call close() with execPendingBlobOps == true
1863 * For LM_CommittedRead access, this will enqueue
1864 * an unlock operation, which the Blob framework
1865 * code invoking this callback will execute before
1866 * returning control to the caller of execute()
1867 */
1868 if (ndb_blob->close(true) != 0)
1869 {
1870 ERR_RETURN(ndb_blob->getNdbError());
1871 }
1872 }
1873 }
1874
1875 DBUG_RETURN(0);
1876 }
1877
1878 /*
1879 Request reading of blob values.
1880
1881 If dst_record is specified, the blob null bit, pointer, and length will be
1882 set in that record. Otherwise they must be set later by calling
1883 unpack_record().
1884 */
1885 int
get_blob_values(const NdbOperation * ndb_op,uchar * dst_record,const MY_BITMAP * bitmap)1886 ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,
1887 const MY_BITMAP *bitmap)
1888 {
1889 uint i;
1890 DBUG_ENTER("ha_ndbcluster::get_blob_values");
1891
1892 m_blob_counter= 0;
1893 m_blob_expected_count_per_row= 0;
1894 m_blob_destination_record= dst_record;
1895 m_blobs_row_total_size= 0;
1896 ndb_op->getNdbTransaction()->
1897 setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));
1898
1899 for (i= 0; i < table_share->fields; i++)
1900 {
1901 Field *field= table->field[i];
1902 if (!(field->flags & BLOB_FLAG))
1903 continue;
1904
1905 DBUG_PRINT("info", ("fieldnr=%d", i));
1906 NdbBlob *ndb_blob;
1907 if (bitmap_is_set(bitmap, i))
1908 {
1909 if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||
1910 ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)
1911 DBUG_RETURN(1);
1912 m_blob_expected_count_per_row++;
1913 }
1914 else
1915 ndb_blob= NULL;
1916
1917 m_value[i].blob= ndb_blob;
1918 }
1919
1920 DBUG_RETURN(0);
1921 }
1922
1923 int
set_blob_values(const NdbOperation * ndb_op,my_ptrdiff_t row_offset,const MY_BITMAP * bitmap,uint * set_count,bool batch)1924 ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,
1925 my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,
1926 uint *set_count, bool batch)
1927 {
1928 uint field_no;
1929 uint *blob_index, *blob_index_end;
1930 int res= 0;
1931 DBUG_ENTER("ha_ndbcluster::set_blob_values");
1932
1933 *set_count= 0;
1934
1935 if (table_share->blob_fields == 0)
1936 DBUG_RETURN(0);
1937
1938 ndb_op->getNdbTransaction()->
1939 setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));
1940 blob_index= table_share->blob_field;
1941 blob_index_end= blob_index + table_share->blob_fields;
1942 do
1943 {
1944 field_no= *blob_index;
1945 /* A NULL bitmap sets all blobs. */
1946 if (bitmap && !bitmap_is_set(bitmap, field_no))
1947 continue;
1948 Field *field= table->field[field_no];
1949
1950 NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);
1951 if (ndb_blob == NULL)
1952 ERR_RETURN(ndb_op->getNdbError());
1953 if (field->is_real_null(row_offset))
1954 {
1955 DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));
1956 if (ndb_blob->setNull() != 0)
1957 ERR_RETURN(ndb_op->getNdbError());
1958 }
1959 else
1960 {
1961 Field_blob *field_blob= (Field_blob *)field;
1962
1963 // Get length and pointer to data
1964 const uchar *field_ptr= field->ptr + row_offset;
1965 uint32 blob_len= field_blob->get_length(field_ptr);
1966 uchar* blob_ptr= NULL;
1967 field_blob->get_ptr(&blob_ptr);
1968
1969 // Looks like NULL ptr signals length 0 blob
1970 if (blob_ptr == NULL) {
1971 DBUG_ASSERT(blob_len == 0);
1972 blob_ptr= (uchar*)"";
1973 }
1974
1975 DBUG_PRINT("value", ("set blob ptr: 0x%lx len: %u",
1976 (long) blob_ptr, blob_len));
1977 DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));
1978
1979 /*
1980 NdbBlob requires the data pointer to remain valid until execute() time.
1981 So when batching, we need to copy the value to a temporary buffer.
1982 */
1983 if (batch && blob_len > 0)
1984 {
1985 uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);
1986 if (!tmp_buf)
1987 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1988 memcpy(tmp_buf, blob_ptr, blob_len);
1989 blob_ptr= tmp_buf;
1990 }
1991 res= ndb_blob->setValue((char*)blob_ptr, blob_len);
1992 if (res != 0)
1993 ERR_RETURN(ndb_op->getNdbError());
1994 }
1995
1996 ++(*set_count);
1997 } while (++blob_index != blob_index_end);
1998
1999 DBUG_RETURN(res);
2000 }
2001
2002 /*
2003 This routine is shared by injector. There is no common blobs buffer
2004 so the buffer and length are passed by reference. Injector also
2005 passes a record pointer diff.
2006 */
get_ndb_blobs_value(TABLE * table,NdbValue * value_array,uchar * & buffer,uint & buffer_size,my_ptrdiff_t ptrdiff)2007 int get_ndb_blobs_value(TABLE* table, NdbValue* value_array,
2008 uchar*& buffer, uint& buffer_size,
2009 my_ptrdiff_t ptrdiff)
2010 {
2011 DBUG_ENTER("get_ndb_blobs_value");
2012
2013 // Field has no field number so cannot use TABLE blob_field
2014 // Loop twice, first only counting total buffer size
2015 for (int loop= 0; loop <= 1; loop++)
2016 {
2017 uint32 offset= 0;
2018 for (uint i= 0; i < table->s->fields; i++)
2019 {
2020 Field *field= table->field[i];
2021 NdbValue value= value_array[i];
2022 if (! (field->flags & BLOB_FLAG))
2023 continue;
2024 if (value.blob == NULL)
2025 {
2026 DBUG_PRINT("info",("[%u] skipped", i));
2027 continue;
2028 }
2029 Field_blob *field_blob= (Field_blob *)field;
2030 NdbBlob *ndb_blob= value.blob;
2031 int isNull;
2032 if (ndb_blob->getNull(isNull) != 0)
2033 ERR_RETURN(ndb_blob->getNdbError());
2034 if (isNull == 0) {
2035 Uint64 len64= 0;
2036 if (ndb_blob->getLength(len64) != 0)
2037 ERR_RETURN(ndb_blob->getNdbError());
2038 // Align to Uint64
2039 uint32 size= Uint32(len64);
2040 if (size % 8 != 0)
2041 size+= 8 - size % 8;
2042 if (loop == 1)
2043 {
2044 uchar *buf= buffer + offset;
2045 uint32 len= 0xffffffff; // Max uint32
2046 if (ndb_blob->readData(buf, len) != 0)
2047 ERR_RETURN(ndb_blob->getNdbError());
2048 DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u [ptrdiff=%d]",
2049 i, offset, (long) buf, len, (int)ptrdiff));
2050 DBUG_ASSERT(len == len64);
2051 // Ugly hack assumes only ptr needs to be changed
2052 field_blob->set_ptr_offset(ptrdiff, len, buf);
2053 }
2054 offset+= size;
2055 }
2056 else if (loop == 1) // undefined or null
2057 {
2058 // have to set length even in this case
2059 uchar *buf= buffer + offset; // or maybe NULL
2060 uint32 len= 0;
2061 field_blob->set_ptr_offset(ptrdiff, len, buf);
2062 DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
2063 }
2064 }
2065 if (loop == 0 && offset > buffer_size)
2066 {
2067 my_free(buffer, MYF(MY_ALLOW_ZERO_PTR));
2068 buffer_size= 0;
2069 DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
2070 buffer= (uchar*) my_malloc(offset, MYF(MY_WME));
2071 if (buffer == NULL)
2072 {
2073 sql_print_error("ha_ndbcluster::get_ndb_blobs_value: "
2074 "my_malloc(%u) failed", offset);
2075 DBUG_RETURN(-1);
2076 }
2077 buffer_size= offset;
2078 }
2079 }
2080 DBUG_RETURN(0);
2081 }
2082
2083
2084 /**
2085 Check if any set or get of blob value in current query.
2086 */
2087
uses_blob_value(const MY_BITMAP * bitmap) const2088 bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const
2089 {
2090 uint *blob_index, *blob_index_end;
2091 if (table_share->blob_fields == 0)
2092 return FALSE;
2093
2094 blob_index= table_share->blob_field;
2095 blob_index_end= blob_index + table_share->blob_fields;
2096 do
2097 {
2098 if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
2099 return TRUE;
2100 } while (++blob_index != blob_index_end);
2101 return FALSE;
2102 }
2103
release_blobs_buffer()2104 void ha_ndbcluster::release_blobs_buffer()
2105 {
2106 DBUG_ENTER("releaseBlobsBuffer");
2107 if (m_blobs_buffer_size > 0)
2108 {
2109 DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));
2110 my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
2111 m_blobs_buffer= 0;
2112 m_blobs_row_total_size= 0;
2113 m_blobs_buffer_size= 0;
2114 }
2115 DBUG_VOID_RETURN;
2116 }
2117
2118 /**
2119 Get metadata for this table from NDB.
2120
2121 Check that frm-file on disk is equal to frm-file
2122 of table accessed in NDB.
2123
2124 @retval
2125 0 ok
2126 @retval
2127 -2 Meta data has changed; Re-read data and try again
2128 */
2129
cmp_frm(const NDBTAB * ndbtab,const void * pack_data,uint pack_length)2130 int cmp_frm(const NDBTAB *ndbtab, const void *pack_data,
2131 uint pack_length)
2132 {
2133 DBUG_ENTER("cmp_frm");
2134 /*
2135 Compare FrmData in NDB with frm file from disk.
2136 */
2137 if ((pack_length != ndbtab->getFrmLength()) ||
2138 (memcmp(pack_data, ndbtab->getFrmData(), pack_length)))
2139 DBUG_RETURN(1);
2140 DBUG_RETURN(0);
2141 }
2142
2143 /*
2144 Does type support a default value?
2145 */
2146 static bool
type_supports_default_value(enum_field_types mysql_type)2147 type_supports_default_value(enum_field_types mysql_type)
2148 {
2149 bool ret = (mysql_type != MYSQL_TYPE_BLOB &&
2150 mysql_type != MYSQL_TYPE_TINY_BLOB &&
2151 mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&
2152 mysql_type != MYSQL_TYPE_LONG_BLOB &&
2153 mysql_type != MYSQL_TYPE_GEOMETRY);
2154
2155 return ret;
2156 }
2157
2158 /**
2159 Check that Ndb data dictionary has the same default values
2160 as MySQLD for the current table.
2161 Called as part of a DBUG check as part of table open
2162
2163 Returns
2164 0 - Defaults are ok
2165 -1 - Some default(s) are bad
2166 */
check_default_values(const NDBTAB * ndbtab)2167 int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)
2168 {
2169 /* Debug only method for checking table defaults aligned
2170 between MySQLD and Ndb
2171 */
2172 bool defaults_aligned= true;
2173
2174 if (ndbtab->hasDefaultValues())
2175 {
2176 /* Ndb supports native defaults for non-pk columns */
2177 my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);
2178
2179 for (uint f=0; f < table_share->fields; f++)
2180 {
2181 Field* field= table->field[f]; // Use Field struct from MySQLD table rep
2182 const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index);
2183
2184 if ((! (field->flags & (PRI_KEY_FLAG |
2185 NO_DEFAULT_VALUE_FLAG))) &&
2186 type_supports_default_value(field->real_type()))
2187 {
2188 /* We expect Ndb to have a native default for this
2189 * column
2190 */
2191 my_ptrdiff_t src_offset= table_share->default_values -
2192 field->table->record[0];
2193
2194 /* Move field by offset to refer to default value */
2195 field->move_field_offset(src_offset);
2196
2197 const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();
2198
2199 if (ndb_default == NULL)
2200 /* MySQLD default must also be NULL */
2201 defaults_aligned= field->is_null();
2202 else
2203 {
2204 if (field->type() != MYSQL_TYPE_BIT)
2205 {
2206 defaults_aligned= (0 == field->cmp(ndb_default));
2207 }
2208 else
2209 {
2210 longlong value= (static_cast<Field_bit*>(field))->val_int();
2211 /* Map to NdbApi format - two Uint32s */
2212 Uint32 out[2];
2213 out[0] = 0;
2214 out[1] = 0;
2215 for (int b=0; b < 64; b++)
2216 {
2217 out[b >> 5] |= (value & 1) << (b & 31);
2218
2219 value= value >> 1;
2220 }
2221 Uint32 defaultLen = field_used_length(field);
2222 defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);
2223 defaults_aligned= (0 == memcmp(ndb_default,
2224 out,
2225 defaultLen));
2226 }
2227 }
2228
2229 field->move_field_offset(-src_offset);
2230
2231 if (unlikely(!defaults_aligned))
2232 {
2233 DBUG_PRINT("info", ("Default values differ for column %u",
2234 field->field_index));
2235 break;
2236 }
2237 }
2238 else
2239 {
2240 /* We don't expect Ndb to have a native default for this column */
2241 if (unlikely(ndbCol->getDefaultValue() != NULL))
2242 {
2243 /* Didn't expect that */
2244 DBUG_PRINT("info", ("Column %u has native default, but shouldn't."
2245 " Flags=%u, type=%u",
2246 field->field_index, field->flags, field->real_type()));
2247 defaults_aligned= false;
2248 break;
2249 }
2250 }
2251 }
2252 tmp_restore_column_map(table->read_set, old_map);
2253 }
2254
2255 return (defaults_aligned? 0: -1);
2256 }
2257
get_metadata(THD * thd,const char * path)2258 int ha_ndbcluster::get_metadata(THD *thd, const char *path)
2259 {
2260 Ndb *ndb= get_thd_ndb(thd)->ndb;
2261 NDBDICT *dict= ndb->getDictionary();
2262 const NDBTAB *tab;
2263 int error;
2264 DBUG_ENTER("get_metadata");
2265 DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));
2266
2267 DBUG_ASSERT(m_table == NULL);
2268 DBUG_ASSERT(m_table_info == NULL);
2269
2270 uchar *data= NULL, *pack_data= NULL;
2271 size_t length, pack_length;
2272
2273 /*
2274 Compare FrmData in NDB with frm file from disk.
2275 */
2276 error= 0;
2277 if (readfrm(path, &data, &length) ||
2278 packfrm(data, length, &pack_data, &pack_length))
2279 {
2280 my_free(data, MYF(MY_ALLOW_ZERO_PTR));
2281 my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));
2282 DBUG_RETURN(1);
2283 }
2284
2285 ndb->setDatabaseName(m_dbname);
2286 Ndb_table_guard ndbtab_g(dict, m_tabname);
2287 if (!(tab= ndbtab_g.get_table()))
2288 ERR_RETURN(dict->getNdbError());
2289
2290 if (get_ndb_share_state(m_share) != NSS_ALTERED
2291 && cmp_frm(tab, pack_data, pack_length))
2292 {
2293 DBUG_PRINT("error",
2294 ("metadata, pack_length: %lu getFrmLength: %d memcmp: %d",
2295 (ulong) pack_length, tab->getFrmLength(),
2296 memcmp(pack_data, tab->getFrmData(), pack_length)));
2297 DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);
2298 DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());
2299 error= HA_ERR_TABLE_DEF_CHANGED;
2300 }
2301 my_free((char*)data, MYF(0));
2302 my_free((char*)pack_data, MYF(0));
2303
2304 /* Now check that any Ndb native defaults are aligned
2305 with MySQLD defaults
2306 */
2307 DBUG_ASSERT(check_default_values(tab) == 0);
2308
2309 if (error)
2310 goto err;
2311
2312 DBUG_PRINT("info", ("fetched table %s", tab->getName()));
2313 m_table= tab;
2314
2315 if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))
2316 {
2317 error= HA_ERR_OUT_OF_MEM;
2318 goto err;
2319 }
2320 if (table_share->primary_key == MAX_KEY)
2321 {
2322 /* Hidden primary key. */
2323 if ((error= add_hidden_pk_ndb_record(dict)) != 0)
2324 goto err;
2325 }
2326
2327 if ((error= add_table_ndb_record(dict)) != 0)
2328 goto err;
2329
2330 /*
2331 Approx. write size in bytes over transporter
2332 */
2333 m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
2334
2335 /* Open indexes */
2336 if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)
2337 goto err;
2338
2339 /*
2340 Backward compatibility for tables created without tablespace
2341 in .frm => read tablespace setting from engine
2342 */
2343 if (table_share->mysql_version < 50120 &&
2344 !table_share->tablespace /* safety */)
2345 {
2346 Uint32 id;
2347 if (tab->getTablespace(&id))
2348 {
2349 NdbDictionary::Tablespace ts= dict->getTablespace(id);
2350 NdbError ndberr= dict->getNdbError();
2351 if (ndberr.classification == NdbError::NoError)
2352 {
2353 const char *tablespace= ts.getName();
2354 const size_t tablespace_len= strlen(tablespace);
2355 if (tablespace_len != 0)
2356 {
2357 DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
2358 table_share->tablespace= strmake_root(&table_share->mem_root,
2359 tablespace,
2360 tablespace_len);
2361 }
2362 }
2363 }
2364 }
2365
2366 ndbtab_g.release();
2367
2368 #ifdef HAVE_NDB_BINLOG
2369 ndbcluster_read_binlog_replication(thd, ndb, m_share, m_table,
2370 ::server_id, table, FALSE);
2371 #endif
2372
2373 DBUG_RETURN(0);
2374
2375 err:
2376 ndbtab_g.invalidate();
2377 m_table= NULL;
2378 DBUG_RETURN(error);
2379 }
2380
fix_unique_index_attr_order(NDB_INDEX_DATA & data,const NDBINDEX * index,KEY * key_info)2381 static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
2382 const NDBINDEX *index,
2383 KEY *key_info)
2384 {
2385 DBUG_ENTER("fix_unique_index_attr_order");
2386 unsigned sz= index->getNoOfIndexColumns();
2387
2388 if (data.unique_index_attrid_map)
2389 my_free((char*)data.unique_index_attrid_map, MYF(0));
2390 data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME));
2391 if (data.unique_index_attrid_map == 0)
2392 {
2393 sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
2394 (unsigned int)sz);
2395 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
2396 }
2397
2398 KEY_PART_INFO* key_part= key_info->key_part;
2399 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2400 DBUG_ASSERT(key_info->user_defined_key_parts == sz);
2401 for (unsigned i= 0; key_part != end; key_part++, i++)
2402 {
2403 const char *field_name= key_part->field->field_name;
2404 #ifndef DBUG_OFF
2405 data.unique_index_attrid_map[i]= 255;
2406 #endif
2407 for (unsigned j= 0; j < sz; j++)
2408 {
2409 const NDBCOL *c= index->getColumn(j);
2410 if (strcmp(field_name, c->getName()) == 0)
2411 {
2412 data.unique_index_attrid_map[i]= j;
2413 break;
2414 }
2415 }
2416 DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
2417 }
2418 DBUG_RETURN(0);
2419 }
2420
2421 /*
2422 Create all the indexes for a table.
2423 If any index should fail to be created,
2424 the error is returned immediately
2425 */
create_indexes(THD * thd,Ndb * ndb,TABLE * tab)2426 int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab)
2427 {
2428 uint i;
2429 int error= 0;
2430 const char *index_name;
2431 KEY* key_info= tab->key_info;
2432 const char **key_name= tab->s->keynames.type_names;
2433 DBUG_ENTER("ha_ndbcluster::create_indexes");
2434
2435 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2436 {
2437 index_name= *key_name;
2438 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2439 error= create_index(thd, index_name, key_info, idx_type, i);
2440 if (error)
2441 {
2442 DBUG_PRINT("error", ("Failed to create index %u", i));
2443 break;
2444 }
2445 }
2446
2447 DBUG_RETURN(error);
2448 }
2449
ndb_init_index(NDB_INDEX_DATA & data)2450 static void ndb_init_index(NDB_INDEX_DATA &data)
2451 {
2452 data.type= UNDEFINED_INDEX;
2453 data.status= UNDEFINED;
2454 data.unique_index= NULL;
2455 data.index= NULL;
2456 data.unique_index_attrid_map= NULL;
2457 data.ndb_record_key= NULL;
2458 data.ndb_unique_record_key= NULL;
2459 data.ndb_unique_record_row= NULL;
2460 }
2461
ndb_clear_index(NDBDICT * dict,NDB_INDEX_DATA & data)2462 static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)
2463 {
2464 if (data.unique_index_attrid_map)
2465 {
2466 my_free((char*)data.unique_index_attrid_map, MYF(0));
2467 }
2468 if (data.ndb_unique_record_key)
2469 dict->releaseRecord(data.ndb_unique_record_key);
2470 if (data.ndb_unique_record_row)
2471 dict->releaseRecord(data.ndb_unique_record_row);
2472 if (data.ndb_record_key)
2473 dict->releaseRecord(data.ndb_record_key);
2474 ndb_init_index(data);
2475 }
2476
2477 static
ndb_protect_char(const char * from,char * to,uint to_length,char protect)2478 void ndb_protect_char(const char* from, char* to, uint to_length, char protect)
2479 {
2480 uint fpos= 0, tpos= 0;
2481
2482 while(from[fpos] != '\0' && tpos < to_length - 1)
2483 {
2484 if (from[fpos] == protect)
2485 {
2486 int len= 0;
2487 to[tpos++]= '@';
2488 if(tpos < to_length - 5)
2489 {
2490 len= sprintf(to+tpos, "00%u", (uint) protect);
2491 tpos+= len;
2492 }
2493 }
2494 else
2495 {
2496 to[tpos++]= from[fpos];
2497 }
2498 fpos++;
2499 }
2500 to[tpos]= '\0';
2501 }
2502
2503 /*
2504 Associate a direct reference to an index handle
2505 with an index (for faster access)
2506 */
add_index_handle(THD * thd,NDBDICT * dict,KEY * key_info,const char * key_name,uint index_no)2507 int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
2508 const char *key_name, uint index_no)
2509 {
2510 char index_name[FN_LEN + 1];
2511 int error= 0;
2512
2513 NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
2514 m_index[index_no].type= idx_type;
2515 DBUG_ENTER("ha_ndbcluster::add_index_handle");
2516 DBUG_PRINT("enter", ("table %s", m_tabname));
2517
2518 ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');
2519 if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
2520 {
2521 DBUG_PRINT("info", ("Get handle to index %s", index_name));
2522 const NDBINDEX *index;
2523 do
2524 {
2525 index= dict->getIndexGlobal(index_name, *m_table);
2526 if (!index)
2527 ERR_RETURN(dict->getNdbError());
2528 DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d",
2529 (long) index,
2530 index->getObjectId(),
2531 index->getObjectVersion() & 0xFFFFFF,
2532 index->getObjectVersion() >> 24,
2533 index->getObjectStatus()));
2534 DBUG_ASSERT(index->getObjectStatus() ==
2535 NdbDictionary::Object::Retrieved);
2536 break;
2537 } while (1);
2538 m_index[index_no].index= index;
2539 }
2540 if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
2541 {
2542 char unique_index_name[FN_LEN + 1];
2543 static const char* unique_suffix= "$unique";
2544 m_has_unique_index= TRUE;
2545 strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
2546 DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
2547 const NDBINDEX *index;
2548 do
2549 {
2550 index= dict->getIndexGlobal(unique_index_name, *m_table);
2551 if (!index)
2552 ERR_RETURN(dict->getNdbError());
2553 DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d",
2554 (long) index,
2555 index->getObjectId(),
2556 index->getObjectVersion() & 0xFFFFFF,
2557 index->getObjectVersion() >> 24,
2558 index->getObjectStatus()));
2559 DBUG_ASSERT(index->getObjectStatus() ==
2560 NdbDictionary::Object::Retrieved);
2561 break;
2562 } while (1);
2563 m_index[index_no].unique_index= index;
2564 error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
2565 }
2566
2567 if (!error)
2568 error= add_index_ndb_record(dict, key_info, index_no);
2569
2570 if (!error)
2571 m_index[index_no].status= ACTIVE;
2572
2573 DBUG_RETURN(error);
2574 }
2575
2576 /*
2577 We use this function to convert null bit masks, as found in class Field,
2578 to bit numbers, as used in NdbRecord.
2579 */
2580 static uint
null_bit_mask_to_bit_number(uchar bit_mask)2581 null_bit_mask_to_bit_number(uchar bit_mask)
2582 {
2583 switch (bit_mask)
2584 {
2585 case 0x1: return 0;
2586 case 0x2: return 1;
2587 case 0x4: return 2;
2588 case 0x8: return 3;
2589 case 0x10: return 4;
2590 case 0x20: return 5;
2591 case 0x40: return 6;
2592 case 0x80: return 7;
2593 default:
2594 DBUG_ASSERT(false);
2595 return 0;
2596 }
2597 }
2598
2599 static void
ndb_set_record_specification(uint field_no,NdbDictionary::RecordSpecification * spec,const TABLE * table,const NdbDictionary::Table * ndb_table)2600 ndb_set_record_specification(uint field_no,
2601 NdbDictionary::RecordSpecification *spec,
2602 const TABLE *table,
2603 const NdbDictionary::Table *ndb_table)
2604 {
2605 spec->column= ndb_table->getColumn(field_no);
2606 spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);
2607 if (table->field[field_no]->null_ptr)
2608 {
2609 spec->nullbit_byte_offset=
2610 Uint32(table->field[field_no]->null_ptr - table->record[0]);
2611 spec->nullbit_bit_in_byte=
2612 null_bit_mask_to_bit_number(table->field[field_no]->null_bit);
2613 }
2614 else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)
2615 {
2616 /* We need to store the position of the overflow bits. */
2617 const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);
2618 spec->nullbit_byte_offset=
2619 Uint32(field_bit->bit_ptr - table->record[0]);
2620 spec->nullbit_bit_in_byte= field_bit->bit_ofs;
2621 }
2622 else
2623 {
2624 spec->nullbit_byte_offset= 0;
2625 spec->nullbit_bit_in_byte= 0;
2626 }
2627 }
2628
2629 int
add_table_ndb_record(NDBDICT * dict)2630 ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)
2631 {
2632 DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");
2633 NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2634 NdbRecord *rec;
2635 uint i;
2636
2637 for (i= 0; i < table_share->fields; i++)
2638 {
2639 ndb_set_record_specification(i, &spec[i], table, m_table);
2640 }
2641
2642 rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),
2643 NdbDictionary::RecMysqldBitfield);
2644 if (! rec)
2645 ERR_RETURN(dict->getNdbError());
2646 m_ndb_record= rec;
2647
2648 DBUG_RETURN(0);
2649 }
2650
2651 /* Create NdbRecord for setting hidden primary key from Uint64. */
2652 int
add_hidden_pk_ndb_record(NDBDICT * dict)2653 ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)
2654 {
2655 DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");
2656 NdbDictionary::RecordSpecification spec[1];
2657 NdbRecord *rec;
2658
2659 spec[0].column= m_table->getColumn(table_share->fields);
2660 spec[0].offset= 0;
2661 spec[0].nullbit_byte_offset= 0;
2662 spec[0].nullbit_bit_in_byte= 0;
2663
2664 rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));
2665 if (! rec)
2666 ERR_RETURN(dict->getNdbError());
2667 m_ndb_hidden_key_record= rec;
2668
2669 DBUG_RETURN(0);
2670 }
2671
2672 int
add_index_ndb_record(NDBDICT * dict,KEY * key_info,uint index_no)2673 ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)
2674 {
2675 DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");
2676 NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2677 NdbRecord *rec;
2678
2679 Uint32 offset= 0;
2680 for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2681 {
2682 KEY_PART_INFO *kp= &key_info->key_part[i];
2683
2684 spec[i].column= m_table->getColumn(kp->fieldnr - 1);
2685 if (! spec[i].column)
2686 ERR_RETURN(dict->getNdbError());
2687 if (kp->null_bit)
2688 {
2689 /* Nullable column. */
2690 spec[i].offset= offset + 1; // First byte is NULL flag
2691 spec[i].nullbit_byte_offset= offset;
2692 spec[i].nullbit_bit_in_byte= 0;
2693 }
2694 else
2695 {
2696 /* Not nullable column. */
2697 spec[i].offset= offset;
2698 spec[i].nullbit_byte_offset= 0;
2699 spec[i].nullbit_bit_in_byte= 0;
2700 }
2701 offset+= kp->store_length;
2702 }
2703
2704 if (m_index[index_no].index)
2705 {
2706 /*
2707 Enable MysqldShrinkVarchar flag so that the two-byte length used by
2708 mysqld for short varchar keys is correctly converted into a one-byte
2709 length used by Ndb kernel.
2710 */
2711 rec= dict->createRecord(m_index[index_no].index, m_table,
2712 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2713 ( NdbDictionary::RecMysqldShrinkVarchar |
2714 NdbDictionary::RecMysqldBitfield ));
2715 if (! rec)
2716 ERR_RETURN(dict->getNdbError());
2717 m_index[index_no].ndb_record_key= rec;
2718 }
2719 else
2720 m_index[index_no].ndb_record_key= NULL;
2721
2722 if (m_index[index_no].unique_index)
2723 {
2724 rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2725 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2726 ( NdbDictionary::RecMysqldShrinkVarchar |
2727 NdbDictionary::RecMysqldBitfield ));
2728 if (! rec)
2729 ERR_RETURN(dict->getNdbError());
2730 m_index[index_no].ndb_unique_record_key= rec;
2731 }
2732 else if (index_no == table_share->primary_key)
2733 {
2734 /* The primary key is special, there is no explicit NDB index associated. */
2735 rec= dict->createRecord(m_table,
2736 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2737 ( NdbDictionary::RecMysqldShrinkVarchar |
2738 NdbDictionary::RecMysqldBitfield ));
2739 if (! rec)
2740 ERR_RETURN(dict->getNdbError());
2741 m_index[index_no].ndb_unique_record_key= rec;
2742 }
2743 else
2744 m_index[index_no].ndb_unique_record_key= NULL;
2745
2746 /* Now do the same, but this time with offsets from Field, for row access. */
2747 for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2748 {
2749 const KEY_PART_INFO *kp= &key_info->key_part[i];
2750
2751 spec[i].offset= kp->offset;
2752 if (kp->null_bit)
2753 {
2754 /* Nullable column. */
2755 spec[i].nullbit_byte_offset= kp->null_offset;
2756 spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);
2757 }
2758 else
2759 {
2760 /* Not nullable column. */
2761 spec[i].nullbit_byte_offset= 0;
2762 spec[i].nullbit_bit_in_byte= 0;
2763 }
2764 }
2765
2766 if (m_index[index_no].unique_index)
2767 {
2768 rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2769 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2770 NdbDictionary::RecMysqldBitfield);
2771 if (! rec)
2772 ERR_RETURN(dict->getNdbError());
2773 m_index[index_no].ndb_unique_record_row= rec;
2774 }
2775 else if (index_no == table_share->primary_key)
2776 {
2777 rec= dict->createRecord(m_table,
2778 spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2779 NdbDictionary::RecMysqldBitfield);
2780 if (! rec)
2781 ERR_RETURN(dict->getNdbError());
2782 m_index[index_no].ndb_unique_record_row= rec;
2783 }
2784 else
2785 m_index[index_no].ndb_unique_record_row= NULL;
2786
2787 DBUG_RETURN(0);
2788 }
2789
2790 /*
2791 Associate index handles for each index of a table
2792 */
open_indexes(THD * thd,Ndb * ndb,TABLE * tab,bool ignore_error)2793 int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,
2794 bool ignore_error)
2795 {
2796 uint i;
2797 int error= 0;
2798 NDBDICT *dict= ndb->getDictionary();
2799 KEY* key_info= tab->key_info;
2800 const char **key_name= tab->s->keynames.type_names;
2801 DBUG_ENTER("ha_ndbcluster::open_indexes");
2802 m_has_unique_index= FALSE;
2803 btree_keys.clear_all();
2804 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2805 {
2806 if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
2807 {
2808 if (ignore_error)
2809 m_index[i].index= m_index[i].unique_index= NULL;
2810 else
2811 break;
2812 }
2813 m_index[i].null_in_unique_index= FALSE;
2814 if (check_index_fields_not_null(key_info))
2815 m_index[i].null_in_unique_index= TRUE;
2816
2817 if (error == 0 && MY_TEST(index_flags(i, 0, 0) & HA_READ_RANGE))
2818 btree_keys.set_bit(i);
2819 }
2820
2821 if (error && !ignore_error)
2822 {
2823 while (i > 0)
2824 {
2825 i--;
2826 if (m_index[i].index)
2827 {
2828 dict->removeIndexGlobal(*m_index[i].index, 1);
2829 m_index[i].index= NULL;
2830 }
2831 if (m_index[i].unique_index)
2832 {
2833 dict->removeIndexGlobal(*m_index[i].unique_index, 1);
2834 m_index[i].unique_index= NULL;
2835 }
2836 }
2837 }
2838
2839 DBUG_ASSERT(error == 0 || error == 4243);
2840
2841 DBUG_RETURN(error);
2842 }
2843
2844 /*
2845 Renumber indexes in index list by shifting out
2846 indexes that are to be dropped
2847 */
renumber_indexes(Ndb * ndb,TABLE * tab)2848 void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
2849 {
2850 uint i;
2851 const char *index_name;
2852 KEY* key_info= tab->key_info;
2853 const char **key_name= tab->s->keynames.type_names;
2854 DBUG_ENTER("ha_ndbcluster::renumber_indexes");
2855
2856 for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2857 {
2858 index_name= *key_name;
2859 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2860 m_index[i].type= idx_type;
2861 if (m_index[i].status == TO_BE_DROPPED)
2862 {
2863 DBUG_PRINT("info", ("Shifting index %s(%i) out of the list",
2864 index_name, i));
2865 NDB_INDEX_DATA tmp;
2866 uint j= i + 1;
2867 // Shift index out of list
2868 while(j != MAX_KEY && m_index[j].status != UNDEFINED)
2869 {
2870 tmp= m_index[j - 1];
2871 m_index[j - 1]= m_index[j];
2872 m_index[j]= tmp;
2873 j++;
2874 }
2875 }
2876 }
2877
2878 DBUG_VOID_RETURN;
2879 }
2880
2881 /*
2882 Drop all indexes that are marked for deletion
2883 */
drop_indexes(Ndb * ndb,TABLE * tab)2884 int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
2885 {
2886 uint i;
2887 int error= 0;
2888 const char *index_name;
2889 KEY* key_info= tab->key_info;
2890 NDBDICT *dict= ndb->getDictionary();
2891 DBUG_ENTER("ha_ndbcluster::drop_indexes");
2892
2893 for (i= 0; i < tab->s->keys; i++, key_info++)
2894 {
2895 NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2896 m_index[i].type= idx_type;
2897 if (m_index[i].status == TO_BE_DROPPED)
2898 {
2899 const NdbDictionary::Index *index= m_index[i].index;
2900 const NdbDictionary::Index *unique_index= m_index[i].unique_index;
2901
2902 if (index)
2903 {
2904 index_name= index->getName();
2905 DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));
2906 // Drop ordered index from ndb
2907 error= dict->dropIndexGlobal(*index);
2908 if (!error)
2909 {
2910 dict->removeIndexGlobal(*index, 1);
2911 m_index[i].index= NULL;
2912 }
2913 }
2914 if (!error && unique_index)
2915 {
2916 index_name= unique_index->getName();
2917 DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
2918 // Drop unique index from ndb
2919 error= dict->dropIndexGlobal(*unique_index);
2920 if (!error)
2921 {
2922 dict->removeIndexGlobal(*unique_index, 1);
2923 m_index[i].unique_index= NULL;
2924 }
2925 }
2926 if (error)
2927 DBUG_RETURN(error);
2928 ndb_clear_index(dict, m_index[i]);
2929 continue;
2930 }
2931 }
2932
2933 DBUG_RETURN(error);
2934 }
2935
2936 /**
2937 Decode the type of an index from information
2938 provided in table object.
2939 */
get_index_type_from_table(uint inx) const2940 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
2941 {
2942 return get_index_type_from_key(inx, table_share->key_info,
2943 inx == table_share->primary_key);
2944 }
2945
get_index_type_from_key(uint inx,KEY * key_info,bool primary) const2946 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
2947 KEY *key_info,
2948 bool primary) const
2949 {
2950 bool is_hash_index= (key_info[inx].algorithm ==
2951 HA_KEY_ALG_HASH);
2952 if (primary)
2953 return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
2954
2955 return ((key_info[inx].flags & HA_NOSAME) ?
2956 (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
2957 ORDERED_INDEX);
2958 }
2959
check_index_fields_not_null(KEY * key_info)2960 bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info)
2961 {
2962 KEY_PART_INFO* key_part= key_info->key_part;
2963 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2964 DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
2965
2966 for (; key_part != end; key_part++)
2967 {
2968 Field* field= key_part->field;
2969 if (field->maybe_null())
2970 DBUG_RETURN(TRUE);
2971 }
2972
2973 DBUG_RETURN(FALSE);
2974 }
2975
release_metadata(THD * thd,Ndb * ndb)2976 void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
2977 {
2978 uint i;
2979
2980 DBUG_ENTER("release_metadata");
2981 DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
2982
2983 NDBDICT *dict= ndb->getDictionary();
2984 int invalidate_indexes= 0;
2985 if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
2986 {
2987 invalidate_indexes = 1;
2988 }
2989 if (m_table != NULL)
2990 {
2991 if (m_ndb_record != NULL)
2992 {
2993 dict->releaseRecord(m_ndb_record);
2994 m_ndb_record= NULL;
2995 }
2996 if (m_ndb_hidden_key_record != NULL)
2997 {
2998 dict->releaseRecord(m_ndb_hidden_key_record);
2999 m_ndb_hidden_key_record= NULL;
3000 }
3001 if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
3002 invalidate_indexes= 1;
3003 dict->removeTableGlobal(*m_table, invalidate_indexes);
3004 }
3005 // TODO investigate
3006 DBUG_ASSERT(m_table_info == NULL);
3007 m_table_info= NULL;
3008
3009 // Release index list
3010 for (i= 0; i < MAX_KEY; i++)
3011 {
3012 if (m_index[i].unique_index)
3013 {
3014 DBUG_ASSERT(m_table != NULL);
3015 dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
3016 }
3017 if (m_index[i].index)
3018 {
3019 DBUG_ASSERT(m_table != NULL);
3020 dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
3021 }
3022 ndb_clear_index(dict, m_index[i]);
3023 }
3024
3025 m_table= NULL;
3026 DBUG_VOID_RETURN;
3027 }
3028
3029
3030 /*
3031 Map from thr_lock_type to NdbOperation::LockMode
3032 */
3033 static inline
get_ndb_lock_mode(enum thr_lock_type type)3034 NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)
3035 {
3036 if (type >= TL_WRITE_ALLOW_WRITE)
3037 return NdbOperation::LM_Exclusive;
3038 if (type == TL_READ_WITH_SHARED_LOCKS)
3039 return NdbOperation::LM_Read;
3040 return NdbOperation::LM_CommittedRead;
3041 }
3042
3043
3044 static const ulong index_type_flags[]=
3045 {
3046 /* UNDEFINED_INDEX */
3047 0,
3048
3049 /* PRIMARY_KEY_INDEX */
3050 HA_ONLY_WHOLE_INDEX,
3051
3052 /* PRIMARY_KEY_ORDERED_INDEX */
3053 /*
3054 Enable HA_KEYREAD_ONLY when "sorted" indexes are supported,
3055 thus ORDER BY clauses can be optimized by reading directly
3056 through the index.
3057 */
3058 // HA_KEYREAD_ONLY |
3059 HA_READ_NEXT |
3060 HA_READ_PREV |
3061 HA_READ_RANGE |
3062 HA_READ_ORDER,
3063
3064 /* UNIQUE_INDEX */
3065 HA_ONLY_WHOLE_INDEX,
3066
3067 /* UNIQUE_ORDERED_INDEX */
3068 HA_READ_NEXT |
3069 HA_READ_PREV |
3070 HA_READ_RANGE |
3071 HA_READ_ORDER,
3072
3073 /* ORDERED_INDEX */
3074 HA_READ_NEXT |
3075 HA_READ_PREV |
3076 HA_READ_RANGE |
3077 HA_READ_ORDER
3078 };
3079
3080 static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);
3081
get_index_type(uint idx_no) const3082 inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
3083 {
3084 DBUG_ASSERT(idx_no < MAX_KEY);
3085 return m_index[idx_no].type;
3086 }
3087
has_null_in_unique_index(uint idx_no) const3088 inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
3089 {
3090 DBUG_ASSERT(idx_no < MAX_KEY);
3091 return m_index[idx_no].null_in_unique_index;
3092 }
3093
3094
3095 /**
3096 Get the flags for an index.
3097
3098 @return
3099 flags depending on the type of the index.
3100 */
3101
index_flags(uint idx_no,uint part,bool all_parts) const3102 inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
3103 bool all_parts) const
3104 {
3105 DBUG_ENTER("ha_ndbcluster::index_flags");
3106 DBUG_PRINT("enter", ("idx_no: %u", idx_no));
3107 DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
3108 DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] |
3109 HA_KEY_SCAN_NOT_ROR);
3110 }
3111
3112 bool
primary_key_is_clustered()3113 ha_ndbcluster::primary_key_is_clustered()
3114 {
3115
3116 if (table->s->primary_key == MAX_KEY)
3117 return false;
3118
3119 /*
3120 NOTE 1: our ordered indexes are not really clustered
3121 but since accesing data when scanning index is free
3122 it's a good approximation
3123
3124 NOTE 2: We really should consider DD attributes here too
3125 (for which there is IO to read data when scanning index)
3126 but that will need to be handled later...
3127 */
3128 const ndb_index_type idx_type =
3129 get_index_type_from_table(table->s->primary_key);
3130 return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||
3131 idx_type == UNIQUE_ORDERED_INDEX ||
3132 idx_type == ORDERED_INDEX);
3133 }
3134
check_index_fields_in_write_set(uint keyno)3135 bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)
3136 {
3137 KEY* key_info= table->key_info + keyno;
3138 KEY_PART_INFO* key_part= key_info->key_part;
3139 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
3140 uint i;
3141 DBUG_ENTER("check_index_fields_in_write_set");
3142
3143 for (i= 0; key_part != end; key_part++, i++)
3144 {
3145 Field* field= key_part->field;
3146 if (!bitmap_is_set(table->write_set, field->field_index))
3147 {
3148 DBUG_RETURN(false);
3149 }
3150 }
3151
3152 DBUG_RETURN(true);
3153 }
3154
3155
3156 /**
3157 Read one record from NDB using primary key.
3158 */
3159
pk_read(const uchar * key,uint key_len,uchar * buf,uint32 * part_id)3160 int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,
3161 uint32 *part_id)
3162 {
3163 NdbConnection *trans= m_thd_ndb->trans;
3164 int res;
3165 DBUG_ENTER("pk_read");
3166 DBUG_PRINT("enter", ("key_len: %u read_set=%x",
3167 key_len, table->read_set->bitmap[0]));
3168 DBUG_DUMP("key", key, key_len);
3169 DBUG_ASSERT(trans);
3170
3171 NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3172
3173 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3174 if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess, table->s->primary_key))
3175 {
3176 // Is parent of pushed join
3177 DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
3178 const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,
3179 (m_user_defined_partitioning ?
3180 part_id : NULL));
3181 if (unlikely(error))
3182 DBUG_RETURN(error);
3183
3184 DBUG_ASSERT(m_active_query!=NULL);
3185 if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3186 m_active_query->getNdbError().code)
3187 {
3188 table->status= STATUS_NOT_FOUND;
3189 DBUG_RETURN(ndb_err(trans));
3190 }
3191
3192 int result= fetch_next_pushed();
3193 if (result == NdbQuery::NextResult_gotRow)
3194 {
3195 DBUG_RETURN(0);
3196 }
3197 else if (result == NdbQuery::NextResult_scanComplete)
3198 {
3199 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3200 }
3201 else
3202 {
3203 DBUG_RETURN(ndb_err(trans));
3204 }
3205 }
3206 else
3207 #endif
3208 {
3209 if (m_pushed_join_operation == PUSHED_ROOT)
3210 {
3211 m_thd_ndb->m_pushed_queries_dropped++;
3212 }
3213
3214 const NdbOperation *op;
3215 if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,
3216 (m_user_defined_partitioning ?
3217 part_id :
3218 NULL))))
3219 ERR_RETURN(trans->getNdbError());
3220
3221 if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3222 op->getNdbError().code)
3223 {
3224 table->status= STATUS_NOT_FOUND;
3225 DBUG_RETURN(ndb_err(trans));
3226 }
3227 table->status= 0;
3228 DBUG_RETURN(0);
3229 }
3230 }
3231
3232 /**
3233 Update primary key or part id by doing delete insert.
3234 */
3235
ndb_pk_update_row(THD * thd,const uchar * old_data,uchar * new_data,uint32 old_part_id)3236 int ha_ndbcluster::ndb_pk_update_row(THD *thd,
3237 const uchar *old_data, uchar *new_data,
3238 uint32 old_part_id)
3239 {
3240 NdbTransaction *trans= m_thd_ndb->trans;
3241 int error;
3242 const NdbOperation *op;
3243 DBUG_ENTER("ndb_pk_update_row");
3244 DBUG_ASSERT(trans);
3245
3246 NdbOperation::OperationOptions *poptions = NULL;
3247 NdbOperation::OperationOptions options;
3248 options.optionsPresent=0;
3249
3250 DBUG_PRINT("info", ("primary key update or partition change, "
3251 "doing read+delete+insert"));
3252 // Get all old fields, since we optimize away fields not in query
3253
3254 const NdbRecord *key_rec;
3255 const uchar *key_row;
3256
3257 if (m_user_defined_partitioning)
3258 {
3259 options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3260 options.partitionId=old_part_id;
3261 poptions=&options;
3262 }
3263
3264 setup_key_ref_for_ndb_record(&key_rec, &key_row, old_data, FALSE);
3265
3266 if (!bitmap_is_set_all(table->read_set))
3267 {
3268 /*
3269 Need to read rest of columns for later re-insert.
3270
3271 Use mask only with columns that are not in write_set, not in
3272 read_set, and not part of the primary key.
3273 */
3274
3275 bitmap_copy(&m_bitmap, table->read_set);
3276 bitmap_union(&m_bitmap, table->write_set);
3277 bitmap_invert(&m_bitmap);
3278 if (!(op= trans->readTuple(key_rec, (const char *)key_row,
3279 m_ndb_record, (char *)new_data,
3280 get_ndb_lock_mode(m_lock.type),
3281 (const unsigned char *)(m_bitmap.bitmap),
3282 poptions,
3283 sizeof(NdbOperation::OperationOptions))))
3284 ERR_RETURN(trans->getNdbError());
3285
3286 if (table_share->blob_fields > 0)
3287 {
3288 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3289 error= get_blob_values(op, new_data, &m_bitmap);
3290 dbug_tmp_restore_column_map(table->read_set, old_map);
3291 if (error != 0)
3292 ERR_RETURN(op->getNdbError());
3293 }
3294 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3295 {
3296 table->status= STATUS_NOT_FOUND;
3297 DBUG_RETURN(ndb_err(trans));
3298 }
3299 }
3300
3301 // Delete old row
3302 error= ndb_delete_row(old_data, TRUE);
3303 if (error)
3304 {
3305 DBUG_PRINT("info", ("delete failed"));
3306 DBUG_RETURN(error);
3307 }
3308
3309 // Insert new row
3310 DBUG_PRINT("info", ("delete succeded"));
3311 bool batched_update= (m_active_cursor != 0);
3312 /*
3313 If we are updating a primary key with auto_increment
3314 then we need to update the auto_increment counter
3315 */
3316 if (table->found_next_number_field &&
3317 bitmap_is_set(table->write_set,
3318 table->found_next_number_field->field_index) &&
3319 (error= set_auto_inc(thd, table->found_next_number_field)))
3320 {
3321 DBUG_RETURN(error);
3322 }
3323
3324 /*
3325 We are mapping a MySQLD PK changing update to an NdbApi delete
3326 and insert.
3327 The original PK changing update may not have written new values
3328 to all columns, so the write set may be partial.
3329 We set the write set to be all columns so that all values are
3330 copied from the old row to the new row.
3331 */
3332 my_bitmap_map *old_map=
3333 tmp_use_all_columns(table, table->write_set);
3334 error= ndb_write_row(new_data, TRUE, batched_update);
3335 tmp_restore_column_map(table->write_set, old_map);
3336
3337 if (error)
3338 {
3339 DBUG_PRINT("info", ("insert failed"));
3340 if (trans->commitStatus() == NdbConnection::Started)
3341 {
3342 if (thd->slave_thread)
3343 g_ndb_slave_state.atTransactionAbort();
3344 m_thd_ndb->m_unsent_bytes= 0;
3345 m_thd_ndb->m_execute_count++;
3346 DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));
3347 trans->execute(NdbTransaction::Rollback);
3348 #ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA
3349 int undo_res;
3350 // Undo delete_row(old_data)
3351 undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);
3352 if (undo_res)
3353 push_warning(table->in_use,
3354 Sql_condition::WARN_LEVEL_WARN,
3355 undo_res,
3356 "NDB failed undoing delete at primary key update");
3357 #endif
3358 }
3359 DBUG_RETURN(error);
3360 }
3361 DBUG_PRINT("info", ("delete+insert succeeded"));
3362
3363 DBUG_RETURN(0);
3364 }
3365
3366 /**
3367 Check that all operations between first and last all
3368 have gotten the errcode
3369 If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
3370 for all succeeding operations
3371 */
check_all_operations_for_error(NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint errcode)3372 bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
3373 const NdbOperation *first,
3374 const NdbOperation *last,
3375 uint errcode)
3376 {
3377 const NdbOperation *op= first;
3378 DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");
3379
3380 while(op)
3381 {
3382 NdbError err= op->getNdbError();
3383 if (err.status != NdbError::Success)
3384 {
3385 if (ndb_to_mysql_error(&err) != (int) errcode)
3386 DBUG_RETURN(FALSE);
3387 if (op == last) break;
3388 op= trans->getNextCompletedOperation(op);
3389 }
3390 else
3391 {
3392 // We found a duplicate
3393 if (op->getType() == NdbOperation::UniqueIndexAccess)
3394 {
3395 if (errcode == HA_ERR_KEY_NOT_FOUND)
3396 {
3397 NdbIndexOperation *iop= (NdbIndexOperation *) op;
3398 const NDBINDEX *index= iop->getIndex();
3399 // Find the key_no of the index
3400 for(uint i= 0; i<table->s->keys; i++)
3401 {
3402 if (m_index[i].unique_index == index)
3403 {
3404 m_dupkey= i;
3405 break;
3406 }
3407 }
3408 }
3409 }
3410 else
3411 {
3412 // Must have been primary key access
3413 DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);
3414 if (errcode == HA_ERR_KEY_NOT_FOUND)
3415 m_dupkey= table->s->primary_key;
3416 }
3417 DBUG_RETURN(FALSE);
3418 }
3419 }
3420 DBUG_RETURN(TRUE);
3421 }
3422
3423
3424 /**
3425 * Check if record contains any null valued columns that are part of a key
3426 */
3427 static
3428 int
check_null_in_record(const KEY * key_info,const uchar * record)3429 check_null_in_record(const KEY* key_info, const uchar *record)
3430 {
3431 KEY_PART_INFO *curr_part, *end_part;
3432 curr_part= key_info->key_part;
3433 end_part= curr_part + key_info->user_defined_key_parts;
3434
3435 while (curr_part != end_part)
3436 {
3437 if (curr_part->null_bit &&
3438 (record[curr_part->null_offset] & curr_part->null_bit))
3439 return 1;
3440 curr_part++;
3441 }
3442 return 0;
3443 /*
3444 We could instead pre-compute a bitmask in table_share with one bit for
3445 every null-bit in the key, and so check this just by OR'ing the bitmask
3446 with the null bitmap in the record.
3447 But not sure it's worth it.
3448 */
3449 }
3450
3451 /* Empty mask and dummy row, for reading no attributes using NdbRecord. */
3452 /* Mask will be initialized to all zeros by linker. */
3453 static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];
3454 static char dummy_row[1];
3455
3456 /**
3457 Peek to check if any rows already exist with conflicting
3458 primary key or unique index values
3459 */
3460
peek_indexed_rows(const uchar * record,NDB_WRITE_OP write_op)3461 int ha_ndbcluster::peek_indexed_rows(const uchar *record,
3462 NDB_WRITE_OP write_op)
3463 {
3464 NdbTransaction *trans;
3465 const NdbOperation *op;
3466 const NdbOperation *first, *last;
3467 NdbOperation::OperationOptions options;
3468 NdbOperation::OperationOptions *poptions=NULL;
3469 options.optionsPresent = 0;
3470 uint i;
3471 int res, error;
3472 DBUG_ENTER("peek_indexed_rows");
3473 if (unlikely(!(trans= get_transaction(error))))
3474 {
3475 DBUG_RETURN(error);
3476 }
3477 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
3478 first= NULL;
3479 if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)
3480 {
3481 /*
3482 * Fetch any row with colliding primary key
3483 */
3484 const NdbRecord *key_rec=
3485 m_index[table->s->primary_key].ndb_unique_record_row;
3486
3487 if (m_user_defined_partitioning)
3488 {
3489 uint32 part_id;
3490 int error;
3491 longlong func_value;
3492 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3493 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
3494 dbug_tmp_restore_column_map(table->read_set, old_map);
3495 if (error)
3496 {
3497 m_part_info->err_value= func_value;
3498 DBUG_RETURN(error);
3499 }
3500 options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3501 options.partitionId=part_id;
3502 poptions=&options;
3503 }
3504
3505 if (!(op= trans->readTuple(key_rec, (const char *)record,
3506 m_ndb_record, dummy_row, lm, empty_mask,
3507 poptions,
3508 sizeof(NdbOperation::OperationOptions))))
3509 ERR_RETURN(trans->getNdbError());
3510
3511 first= op;
3512 }
3513 /*
3514 * Fetch any rows with colliding unique indexes
3515 */
3516 KEY* key_info;
3517 for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
3518 {
3519 if (i != table_share->primary_key &&
3520 key_info->flags & HA_NOSAME &&
3521 bitmap_is_overlapping(table->write_set, m_key_fields[i]))
3522 {
3523 /*
3524 A unique index is defined on table and it's being updated
3525 We cannot look up a NULL field value in a unique index. But since
3526 keys with NULLs are not indexed, such rows cannot conflict anyway, so
3527 we just skip the index in this case.
3528 */
3529 if (check_null_in_record(key_info, record))
3530 {
3531 DBUG_PRINT("info", ("skipping check for key with NULL"));
3532 continue;
3533 }
3534 if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))
3535 {
3536 DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));
3537 continue;
3538 }
3539
3540 const NdbOperation *iop;
3541 const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;
3542 if (!(iop= trans->readTuple(key_rec, (const char *)record,
3543 m_ndb_record, dummy_row,
3544 lm, empty_mask)))
3545 ERR_RETURN(trans->getNdbError());
3546
3547 if (!first)
3548 first= iop;
3549 }
3550 }
3551 last= trans->getLastDefinedOperation();
3552 if (first)
3553 res= execute_no_commit_ie(m_thd_ndb, trans);
3554 else
3555 {
3556 // Table has no keys
3557 table->status= STATUS_NOT_FOUND;
3558 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3559 }
3560 if (check_all_operations_for_error(trans, first, last,
3561 HA_ERR_KEY_NOT_FOUND))
3562 {
3563 table->status= STATUS_NOT_FOUND;
3564 DBUG_RETURN(ndb_err(trans));
3565 }
3566 else
3567 {
3568 DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
3569 }
3570 DBUG_RETURN(0);
3571 }
3572
3573
3574 /**
3575 Read one record from NDB using unique secondary index.
3576 */
3577
unique_index_read(const uchar * key,uint key_len,uchar * buf)3578 int ha_ndbcluster::unique_index_read(const uchar *key,
3579 uint key_len, uchar *buf)
3580 {
3581 NdbTransaction *trans= m_thd_ndb->trans;
3582 DBUG_ENTER("ha_ndbcluster::unique_index_read");
3583 DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
3584 DBUG_DUMP("key", key, key_len);
3585 DBUG_ASSERT(trans);
3586
3587 NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3588
3589 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3590 if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess, active_index))
3591 {
3592 DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
3593 const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);
3594 if (unlikely(error))
3595 DBUG_RETURN(error);
3596
3597 DBUG_ASSERT(m_active_query!=NULL);
3598 if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3599 m_active_query->getNdbError().code)
3600 {
3601 table->status= STATUS_GARBAGE;
3602 DBUG_RETURN(ndb_err(trans));
3603 }
3604
3605 int result= fetch_next_pushed();
3606 if (result == NdbQuery::NextResult_gotRow)
3607 {
3608 DBUG_RETURN(0);
3609 }
3610 else if (result == NdbQuery::NextResult_scanComplete)
3611 {
3612 DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3613 }
3614 else
3615 {
3616 DBUG_RETURN(ndb_err(trans));
3617 }
3618 }
3619 else
3620 #endif
3621 {
3622 if (m_pushed_join_operation == PUSHED_ROOT)
3623 {
3624 m_thd_ndb->m_pushed_queries_dropped++;
3625 }
3626
3627 const NdbOperation *op;
3628
3629 if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))
3630 ERR_RETURN(trans->getNdbError());
3631
3632 if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3633 op->getNdbError().code)
3634 {
3635 int err= ndb_err(trans);
3636 if(err==HA_ERR_KEY_NOT_FOUND)
3637 table->status= STATUS_NOT_FOUND;
3638 else
3639 table->status= STATUS_GARBAGE;
3640
3641 DBUG_RETURN(err);
3642 }
3643
3644 table->status= 0;
3645 DBUG_RETURN(0);
3646 }
3647 }
3648
3649 int
scan_handle_lock_tuple(NdbScanOperation * scanOp,NdbTransaction * trans)3650 ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,
3651 NdbTransaction *trans)
3652 {
3653 DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");
3654 if (m_lock_tuple)
3655 {
3656 /*
3657 Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
3658 (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
3659 LOCK WITH SHARE MODE) and row was not explictly unlocked
3660 with unlock_row() call
3661 */
3662 const NdbOperation *op;
3663 // Lock row
3664 DBUG_PRINT("info", ("Keeping lock on scanned row"));
3665
3666 if (!(op= scanOp->lockCurrentTuple(trans, m_ndb_record,
3667 dummy_row, empty_mask)))
3668 {
3669 /* purecov: begin inspected */
3670 m_lock_tuple= FALSE;
3671 ERR_RETURN(trans->getNdbError());
3672 /* purecov: end */
3673 }
3674 m_thd_ndb->m_unsent_bytes+=12;
3675 }
3676 m_lock_tuple= FALSE;
3677 DBUG_RETURN(0);
3678 }
3679
fetch_next(NdbScanOperation * cursor)3680 inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
3681 {
3682 DBUG_ENTER("fetch_next");
3683 int local_check;
3684 int error;
3685 NdbTransaction *trans= m_thd_ndb->trans;
3686
3687 DBUG_ASSERT(trans);
3688 if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
3689 DBUG_RETURN(error);
3690
3691 bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
3692 m_lock.type != TL_READ_WITH_SHARED_LOCKS;
3693 do {
3694 DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
3695 /*
3696 We can only handle one tuple with blobs at a time.
3697 */
3698 if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)
3699 {
3700 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3701 DBUG_RETURN(ndb_err(trans));
3702 }
3703
3704 /* Should be no unexamined completed operations
3705 nextResult() on Blobs generates Blob part read ops,
3706 so we will free them here
3707 */
3708 release_completed_operations(trans);
3709
3710 if ((local_check= cursor->nextResult(&_m_next_row,
3711 contact_ndb,
3712 m_thd_ndb->m_force_send)) == 0)
3713 {
3714 /*
3715 Explicitly lock tuple if "select for update" or
3716 "select lock in share mode"
3717 */
3718 m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
3719 ||
3720 m_lock.type == TL_READ_WITH_SHARED_LOCKS);
3721 DBUG_RETURN(0);
3722 }
3723 else if (local_check == 1 || local_check == 2)
3724 {
3725 // 1: No more records
3726 // 2: No more cached records
3727
3728 /*
3729 Before fetching more rows and releasing lock(s),
3730 all pending update or delete operations should
3731 be sent to NDB
3732 */
3733 DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
3734 (long) m_thd_ndb->m_unsent_bytes));
3735 if (m_thd_ndb->m_unsent_bytes)
3736 {
3737 if ((error = flush_bulk_insert()) != 0)
3738 DBUG_RETURN(error);
3739 }
3740 contact_ndb= (local_check == 2);
3741 }
3742 else
3743 {
3744 DBUG_RETURN(ndb_err(trans));
3745 }
3746 } while (local_check == 2);
3747
3748 DBUG_RETURN(1);
3749 }
3750
fetch_next_pushed()3751 int ha_ndbcluster::fetch_next_pushed()
3752 {
3753 DBUG_ENTER("fetch_next_pushed (from pushed operation)");
3754
3755 DBUG_ASSERT(m_pushed_operation);
3756 NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);
3757
3758 /**
3759 * Only prepare result & status from this operation in pushed join.
3760 * Consecutive rows are prepared through ::index_read_pushed() and
3761 * ::index_next_pushed() which unpack and set correct status for each row.
3762 */
3763 if (result == NdbQuery::NextResult_gotRow)
3764 {
3765 DBUG_ASSERT(m_next_row!=NULL);
3766 DBUG_PRINT("info", ("One more record found"));
3767 table->status= 0;
3768 unpack_record(table->record[0], m_next_row);
3769 // m_thd_ndb->m_pushed_reads++;
3770 // DBUG_RETURN(0)
3771 }
3772 else if (result == NdbQuery::NextResult_scanComplete)
3773 {
3774 DBUG_ASSERT(m_next_row==NULL);
3775 DBUG_PRINT("info", ("No more records"));
3776 table->status= STATUS_NOT_FOUND;
3777 // m_thd_ndb->m_pushed_reads++;
3778 // DBUG_RETURN(HA_ERR_END_OF_FILE);
3779 }
3780 else
3781 {
3782 DBUG_PRINT("info", ("Error from 'nextResult()'"));
3783 table->status= STATUS_GARBAGE;
3784 // DBUG_ASSERT(false);
3785 // DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3786 }
3787 DBUG_RETURN(result);
3788 }
3789
3790 /**
3791 Get the first record from an indexed table access being a child
3792 operation in a pushed join. Fetch will be from prefetched
3793 cached records which are materialized into the bound buffer
3794 areas as result of this call.
3795 */
3796
3797 int
index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3798 ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,
3799 key_part_map keypart_map)
3800 {
3801 DBUG_ENTER("index_read_pushed");
3802
3803 // Handler might have decided to not execute the pushed joins which has been prepared
3804 // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3805 if (unlikely(!check_is_pushed()))
3806 {
3807 DBUG_RETURN(index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT));
3808 }
3809
3810 // Might need to re-establish first result row (wrt. its parents which may have been navigated)
3811 NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();
3812
3813 // Result from pushed operation will be referred by 'm_next_row' if non-NULL
3814 if (result == NdbQuery::NextResult_gotRow)
3815 {
3816 DBUG_ASSERT(m_next_row!=NULL);
3817 unpack_record(buf, m_next_row);
3818 table->status= 0;
3819 m_thd_ndb->m_pushed_reads++;
3820 }
3821 else
3822 {
3823 DBUG_ASSERT(result!=NdbQuery::NextResult_gotRow);
3824 table->status= STATUS_NOT_FOUND;
3825 DBUG_PRINT("info", ("No record found"));
3826 // m_thd_ndb->m_pushed_reads++;
3827 // DBUG_RETURN(HA_ERR_END_OF_FILE);
3828 }
3829 DBUG_RETURN(0);
3830 }
3831
3832
3833 /**
3834 Get the next record from an indexes table access being a child
3835 operation in a pushed join. Fetch will be from prefetched
3836 cached records which are materialized into the bound buffer
3837 areas as result of this call.
3838 */
index_next_pushed(uchar * buf)3839 int ha_ndbcluster::index_next_pushed(uchar *buf)
3840 {
3841 DBUG_ENTER("index_next_pushed");
3842
3843 // Handler might have decided to not execute the pushed joins which has been prepared
3844 // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3845 if (unlikely(!check_is_pushed()))
3846 {
3847 DBUG_RETURN(index_next(buf));
3848 }
3849
3850 DBUG_ASSERT(m_pushed_join_operation>PUSHED_ROOT); // Child of a pushed join
3851 DBUG_ASSERT(m_active_query==NULL);
3852
3853 int res = fetch_next_pushed();
3854 if (res == NdbQuery::NextResult_gotRow)
3855 {
3856 DBUG_RETURN(0);
3857 }
3858 else if (res == NdbQuery::NextResult_scanComplete)
3859 {
3860 DBUG_RETURN(HA_ERR_END_OF_FILE);
3861 }
3862 else
3863 {
3864 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3865 }
3866 }
3867
3868
3869 /**
3870 Get the next record of a started scan. Try to fetch
3871 it locally from NdbApi cached records if possible,
3872 otherwise ask NDB for more.
3873
3874 @note
3875 If this is a update/delete make sure to not contact
3876 NDB before any pending ops have been sent to NDB.
3877 */
3878
next_result(uchar * buf)3879 inline int ha_ndbcluster::next_result(uchar *buf)
3880 {
3881 int res;
3882 DBUG_ENTER("next_result");
3883
3884 if (m_active_cursor)
3885 {
3886 if ((res= fetch_next(m_active_cursor)) == 0)
3887 {
3888 DBUG_PRINT("info", ("One more record found"));
3889
3890 unpack_record(buf, m_next_row);
3891 table->status= 0;
3892 DBUG_RETURN(0);
3893 }
3894 else if (res == 1)
3895 {
3896 // No more records
3897 table->status= STATUS_NOT_FOUND;
3898
3899 DBUG_PRINT("info", ("No more records"));
3900 DBUG_RETURN(HA_ERR_END_OF_FILE);
3901 }
3902 else
3903 {
3904 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3905 }
3906 }
3907 else if (m_active_query)
3908 {
3909 res= fetch_next_pushed();
3910 if (res == NdbQuery::NextResult_gotRow)
3911 {
3912 DBUG_RETURN(0);
3913 }
3914 else if (res == NdbQuery::NextResult_scanComplete)
3915 {
3916 DBUG_RETURN(HA_ERR_END_OF_FILE);
3917 }
3918 else
3919 {
3920 DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3921 }
3922 }
3923 else
3924 DBUG_RETURN(HA_ERR_END_OF_FILE);
3925 }
3926
3927 /**
3928 Do a primary key or unique key index read operation.
3929 The key value is taken from a buffer in mysqld key format.
3930 */
3931 const NdbOperation *
pk_unique_index_read_key(uint idx,const uchar * key,uchar * buf,NdbOperation::LockMode lm,Uint32 * ppartition_id)3932 ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,
3933 NdbOperation::LockMode lm,
3934 Uint32 *ppartition_id)
3935 {
3936 const NdbOperation *op;
3937 const NdbRecord *key_rec;
3938 NdbOperation::OperationOptions options;
3939 NdbOperation::OperationOptions *poptions = NULL;
3940 options.optionsPresent= 0;
3941 NdbOperation::GetValueSpec gets[2];
3942
3943 DBUG_ASSERT(m_thd_ndb->trans);
3944
3945 if (idx != MAX_KEY)
3946 key_rec= m_index[idx].ndb_unique_record_key;
3947 else
3948 key_rec= m_ndb_hidden_key_record;
3949
3950 /* Initialize the null bitmap, setting unused null bits to 1. */
3951 memset(buf, 0xff, table->s->null_bytes);
3952
3953 if (table_share->primary_key == MAX_KEY)
3954 {
3955 get_hidden_fields_keyop(&options, gets);
3956 poptions= &options;
3957 }
3958
3959 if (ppartition_id != NULL)
3960 {
3961 assert(m_user_defined_partitioning);
3962 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3963 options.partitionId= *ppartition_id;
3964 poptions= &options;
3965 }
3966
3967 op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,
3968 (char *)buf, lm,
3969 (uchar *)(table->read_set->bitmap), poptions,
3970 sizeof(NdbOperation::OperationOptions));
3971
3972 if (uses_blob_value(table->read_set) &&
3973 get_blob_values(op, buf, table->read_set) != 0)
3974 return NULL;
3975
3976 return op;
3977 }
3978
3979 extern void sql_print_information(const char *format, ...);
3980
3981 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3982 static
3983 bool
is_shrinked_varchar(const Field * field)3984 is_shrinked_varchar(const Field *field)
3985 {
3986 if (field->real_type() == MYSQL_TYPE_VARCHAR)
3987 {
3988 if (((Field_varstring*)field)->length_bytes == 1)
3989 return true;
3990 }
3991
3992 return false;
3993 }
3994
3995 int
pk_unique_index_read_key_pushed(uint idx,const uchar * key,Uint32 * ppartition_id)3996 ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx,
3997 const uchar *key,
3998 Uint32 *ppartition_id)
3999 {
4000 DBUG_ENTER("pk_unique_index_read_key_pushed");
4001 NdbOperation::OperationOptions options;
4002 NdbOperation::OperationOptions *poptions = NULL;
4003 options.optionsPresent= 0;
4004 NdbOperation::GetValueSpec gets[2];
4005
4006 DBUG_ASSERT(m_thd_ndb->trans);
4007 DBUG_ASSERT(idx < MAX_KEY);
4008
4009 if (m_active_query)
4010 {
4011 m_active_query->close(FALSE);
4012 m_active_query= NULL;
4013 }
4014
4015 if (table_share->primary_key == MAX_KEY)
4016 {
4017 get_hidden_fields_keyop(&options, gets);
4018 poptions= &options;
4019 }
4020
4021 if (ppartition_id != NULL)
4022 {
4023 assert(m_user_defined_partitioning);
4024 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
4025 options.partitionId= *ppartition_id;
4026 poptions= &options;
4027 }
4028
4029 KEY *key_def= &table->key_info[idx];
4030 KEY_PART_INFO *key_part;
4031
4032 uint i;
4033 Uint32 offset= 0;
4034 NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];
4035 DBUG_ASSERT(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);
4036
4037 uint map[ndb_pushed_join::MAX_KEY_PART];
4038 ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);
4039
4040 // Bind key values defining root of pushed join
4041 for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)
4042 {
4043 bool shrinkVarChar= is_shrinked_varchar(key_part->field);
4044
4045 if (key_part->null_bit) // Column is nullable
4046 {
4047 DBUG_ASSERT(idx != table_share->primary_key); // PK can't be nullable
4048 DBUG_ASSERT(*(key+offset)==0); // Null values not allowed in key
4049 // Value is imm. after NULL indicator
4050 paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);
4051 }
4052 else // Non-nullable column
4053 {
4054 paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);
4055 }
4056 offset+= key_part->store_length;
4057 }
4058
4059 const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);
4060 DBUG_RETURN(ret);
4061 }
4062
4063 #endif
4064
4065 /** Count number of columns in key part. */
4066 static uint
count_key_columns(const KEY * key_info,const key_range * key)4067 count_key_columns(const KEY *key_info, const key_range *key)
4068 {
4069 KEY_PART_INFO *first_key_part= key_info->key_part;
4070 KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;
4071 KEY_PART_INFO *key_part;
4072 uint length= 0;
4073 for(key_part= first_key_part; key_part < key_part_end; key_part++)
4074 {
4075 if (length >= key->length)
4076 break;
4077 length+= key_part->store_length;
4078 }
4079 return key_part - first_key_part;
4080 }
4081
4082 /* Helper method to compute NDB index bounds. Note: does not set range_no. */
4083 /* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */
4084 void
compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,const KEY * key_info,const key_range * start_key,const key_range * end_key,int from)4085 compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,
4086 const KEY *key_info,
4087 const key_range *start_key, const key_range *end_key,
4088 int from)
4089 {
4090 DBUG_ENTER("ha_ndbcluster::compute_index_bounds");
4091 DBUG_PRINT("info", ("from: %d", from));
4092
4093 #ifndef DBUG_OFF
4094 DBUG_PRINT("info", ("key parts: %u length: %u",
4095 key_info->user_defined_key_parts, key_info->key_length));
4096 {
4097 for (uint j= 0; j <= 1; j++)
4098 {
4099 const key_range* kr= (j == 0 ? start_key : end_key);
4100 if (kr)
4101 {
4102 DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",
4103 j, kr->length, kr->keypart_map, kr->flag));
4104 DBUG_DUMP("key", kr->key, kr->length);
4105 }
4106 else
4107 {
4108 DBUG_PRINT("info", ("key range %u: none", j));
4109 }
4110 }
4111 }
4112 #endif
4113
4114 if (start_key)
4115 {
4116 bound.low_key= (const char*)start_key->key;
4117 bound.low_key_count= count_key_columns(key_info, start_key);
4118 bound.low_inclusive=
4119 start_key->flag != HA_READ_AFTER_KEY &&
4120 start_key->flag != HA_READ_BEFORE_KEY;
4121 }
4122 else
4123 {
4124 bound.low_key= NULL;
4125 bound.low_key_count= 0;
4126 }
4127
4128 /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */
4129 if (start_key &&
4130 (start_key->flag == HA_READ_KEY_EXACT ||
4131 start_key->flag == HA_READ_PREFIX_LAST) &&
4132 from != 1)
4133 {
4134 bound.high_key= bound.low_key;
4135 bound.high_key_count= bound.low_key_count;
4136 bound.high_inclusive= TRUE;
4137 }
4138 else if (end_key)
4139 {
4140 bound.high_key= (const char*)end_key->key;
4141 bound.high_key_count= count_key_columns(key_info, end_key);
4142 /*
4143 For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for
4144 the end_key.
4145 So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in
4146 start_key it does not set low_inclusive.
4147 */
4148 bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;
4149 if (end_key->flag == HA_READ_KEY_EXACT ||
4150 end_key->flag == HA_READ_PREFIX_LAST)
4151 {
4152 bound.low_key= bound.high_key;
4153 bound.low_key_count= bound.high_key_count;
4154 bound.low_inclusive= TRUE;
4155 }
4156 }
4157 else
4158 {
4159 bound.high_key= NULL;
4160 bound.high_key_count= 0;
4161 }
4162 DBUG_PRINT("info", ("start_flag=%d end_flag=%d"
4163 " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",
4164 start_key?start_key->flag:0, end_key?end_key->flag:0,
4165 bound.low_key_count,
4166 bound.low_key_count?bound.low_inclusive:0,
4167 bound.high_key_count,
4168 bound.high_key_count?bound.high_inclusive:0));
4169 DBUG_VOID_RETURN;
4170 }
4171
4172 /**
4173 Start ordered index scan in NDB
4174 */
4175
ordered_index_scan(const key_range * start_key,const key_range * end_key,bool sorted,bool descending,uchar * buf,part_id_range * part_spec)4176 int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
4177 const key_range *end_key,
4178 bool sorted, bool descending,
4179 uchar* buf, part_id_range *part_spec)
4180 {
4181 NdbTransaction *trans;
4182 NdbIndexScanOperation *op;
4183 int error;
4184
4185 DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
4186 DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",
4187 active_index, sorted, descending, table->read_set->bitmap[0]));
4188 DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
4189
4190 // Check that sorted seems to be initialised
4191 DBUG_ASSERT(sorted == 0 || sorted == 1);
4192
4193 if (unlikely(!(trans= get_transaction(error))))
4194 {
4195 DBUG_RETURN(error);
4196 }
4197
4198 if ((error= close_scan()))
4199 DBUG_RETURN(error);
4200
4201 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4202
4203 const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
4204 const NdbRecord *row_rec= m_ndb_record;
4205
4206 NdbIndexScanOperation::IndexBound bound;
4207 NdbIndexScanOperation::IndexBound *pbound = NULL;
4208 if (start_key != NULL || end_key != NULL)
4209 {
4210 /*
4211 Compute bounds info, reversing range boundaries
4212 if descending
4213 */
4214 compute_index_bounds(bound,
4215 table->key_info + active_index,
4216 (descending?
4217 end_key : start_key),
4218 (descending?
4219 start_key : end_key),
4220 0);
4221 bound.range_no = 0;
4222 pbound = &bound;
4223 }
4224
4225 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
4226 if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index,
4227 sorted))
4228 {
4229 const int error= create_pushed_join();
4230 if (unlikely(error))
4231 DBUG_RETURN(error);
4232
4233 NdbQuery* const query= m_active_query;
4234 if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)
4235 ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending
4236 : NdbQueryOptions::ScanOrdering_ascending))
4237 {
4238 ERR_RETURN(query->getNdbError());
4239 }
4240
4241 if (pbound && query->setBound(key_rec, pbound)!=0)
4242 ERR_RETURN(query->getNdbError());
4243
4244 m_thd_ndb->m_scan_count++;
4245
4246 bool prunable = false;
4247 if (unlikely(query->isPrunable(prunable) != 0))
4248 ERR_RETURN(query->getNdbError());
4249 if (prunable)
4250 m_thd_ndb->m_pruned_scan_count++;
4251
4252 DBUG_ASSERT(!uses_blob_value(table->read_set)); // Can't have BLOB in pushed joins (yet)
4253 }
4254 else
4255 #endif
4256 {
4257 if (m_pushed_join_operation == PUSHED_ROOT)
4258 {
4259 m_thd_ndb->m_pushed_queries_dropped++;
4260 }
4261
4262 NdbScanOperation::ScanOptions options;
4263 options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;
4264 options.scan_flags=0;
4265
4266 NdbOperation::GetValueSpec gets[2];
4267 if (table_share->primary_key == MAX_KEY)
4268 get_hidden_fields_scan(&options, gets);
4269
4270 if (lm == NdbOperation::LM_Read)
4271 options.scan_flags|= NdbScanOperation::SF_KeyInfo;
4272 if (sorted)
4273 options.scan_flags|= NdbScanOperation::SF_OrderByFull;
4274 if (descending)
4275 options.scan_flags|= NdbScanOperation::SF_Descending;
4276
4277 /* Partition pruning */
4278 if (m_use_partition_pruning &&
4279 m_user_defined_partitioning && part_spec != NULL &&
4280 part_spec->start_part == part_spec->end_part)
4281 {
4282 /* Explicitly set partition id when pruning User-defined partitioned scan */
4283 options.partitionId = part_spec->start_part;
4284 options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4285 }
4286
4287 NdbInterpretedCode code(m_table);
4288 if (m_cond && m_cond->generate_scan_filter(&code, &options))
4289 ERR_RETURN(code.getNdbError());
4290
4291 if (!(op= trans->scanIndex(key_rec, row_rec, lm,
4292 (uchar *)(table->read_set->bitmap),
4293 pbound,
4294 &options,
4295 sizeof(NdbScanOperation::ScanOptions))))
4296 ERR_RETURN(trans->getNdbError());
4297
4298 DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));
4299 m_thd_ndb->m_scan_count++;
4300 m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4301
4302 if (uses_blob_value(table->read_set) &&
4303 get_blob_values(op, NULL, table->read_set) != 0)
4304 ERR_RETURN(op->getNdbError());
4305
4306 m_active_cursor= op;
4307 }
4308
4309 if (sorted)
4310 {
4311 m_thd_ndb->m_sorted_scan_count++;
4312 }
4313
4314 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4315 DBUG_RETURN(ndb_err(trans));
4316
4317 DBUG_RETURN(next_result(buf));
4318 }
4319
4320 static
4321 int
guess_scan_flags(NdbOperation::LockMode lm,const NDBTAB * tab,const MY_BITMAP * readset)4322 guess_scan_flags(NdbOperation::LockMode lm,
4323 const NDBTAB* tab, const MY_BITMAP* readset)
4324 {
4325 int flags= 0;
4326 flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
4327 if (tab->checkColumns(0, 0) & 2)
4328 {
4329 int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
4330
4331 if (ret & 2)
4332 { // If disk columns...use disk scan
4333 flags |= NdbScanOperation::SF_DiskScan;
4334 }
4335 else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
4336 {
4337 // If no mem column is set and exclusive...guess disk scan
4338 flags |= NdbScanOperation::SF_DiskScan;
4339 }
4340 }
4341 return flags;
4342 }
4343
4344 /*
4345 Start full table scan in NDB or unique index scan
4346 */
4347
full_table_scan(const KEY * key_info,const key_range * start_key,const key_range * end_key,uchar * buf)4348 int ha_ndbcluster::full_table_scan(const KEY* key_info,
4349 const key_range *start_key,
4350 const key_range *end_key,
4351 uchar *buf)
4352 {
4353 int error;
4354 NdbTransaction *trans= m_thd_ndb->trans;
4355 part_id_range part_spec;
4356 bool use_set_part_id= FALSE;
4357 NdbOperation::GetValueSpec gets[2];
4358
4359 DBUG_ENTER("full_table_scan");
4360 DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
4361
4362 if (m_use_partition_pruning && m_user_defined_partitioning)
4363 {
4364 DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);
4365 part_spec.start_part= 0;
4366 part_spec.end_part= m_part_info->get_tot_partitions() - 1;
4367 prune_partition_set(table, &part_spec);
4368 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
4369 part_spec.start_part, part_spec.end_part));
4370 /*
4371 If partition pruning has found no partition in set
4372 we can return HA_ERR_END_OF_FILE
4373 */
4374 if (part_spec.start_part > part_spec.end_part)
4375 {
4376 DBUG_RETURN(HA_ERR_END_OF_FILE);
4377 }
4378
4379 if (part_spec.start_part == part_spec.end_part)
4380 {
4381 /*
4382 * Only one partition is required to scan, if sorted is required
4383 * don't need it anymore since output from one ordered partitioned
4384 * index is always sorted.
4385 *
4386 * Note : This table scan pruning currently only occurs for
4387 * UserDefined partitioned tables.
4388 * It could be extended to occur for natively partitioned tables if
4389 * the Partitioning layer can make a key (e.g. start or end key)
4390 * available so that we can determine the correct pruning in the
4391 * NDBAPI layer.
4392 */
4393 use_set_part_id= TRUE;
4394 if (!trans)
4395 if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
4396 error))))
4397 DBUG_RETURN(error);
4398 }
4399 }
4400 if (!trans)
4401 if (unlikely(!(trans= start_transaction(error))))
4402 DBUG_RETURN(error);
4403
4404 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4405 NdbScanOperation::ScanOptions options;
4406 options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |
4407 NdbScanOperation::ScanOptions::SO_PARALLEL);
4408 options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);
4409 options.parallel= DEFAULT_PARALLELISM;
4410
4411 if (use_set_part_id) {
4412 assert(m_user_defined_partitioning);
4413 options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4414 options.partitionId = part_spec.start_part;
4415 };
4416
4417 if (table_share->primary_key == MAX_KEY)
4418 get_hidden_fields_scan(&options, gets);
4419
4420 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
4421 if (check_if_pushable(NdbQueryOperationDef::TableScan))
4422 {
4423 const int error= create_pushed_join();
4424 if (unlikely(error))
4425 DBUG_RETURN(error);
4426
4427 m_thd_ndb->m_scan_count++;
4428 DBUG_ASSERT(!uses_blob_value(table->read_set)); // Can't have BLOB in pushed joins (yet)
4429 }
4430 else
4431 #endif
4432 {
4433 if (m_pushed_join_operation == PUSHED_ROOT)
4434 {
4435 m_thd_ndb->m_pushed_queries_dropped++;
4436 }
4437
4438 NdbScanOperation *op;
4439 NdbInterpretedCode code(m_table);
4440
4441 if (!key_info)
4442 {
4443 if (m_cond && m_cond->generate_scan_filter(&code, &options))
4444 ERR_RETURN(code.getNdbError());
4445 }
4446 else
4447 {
4448 /* Unique index scan in NDB (full table scan with scan filter) */
4449 DBUG_PRINT("info", ("Starting unique index scan"));
4450 if (!m_cond)
4451 m_cond= new ha_ndbcluster_cond;
4452
4453 if (!m_cond)
4454 {
4455 my_errno= HA_ERR_OUT_OF_MEM;
4456 DBUG_RETURN(my_errno);
4457 }
4458 if (m_cond->generate_scan_filter_from_key(&code, &options, key_info, start_key, end_key, buf))
4459 ERR_RETURN(code.getNdbError());
4460 }
4461
4462 if (!(op= trans->scanTable(m_ndb_record, lm,
4463 (uchar *)(table->read_set->bitmap),
4464 &options, sizeof(NdbScanOperation::ScanOptions))))
4465 ERR_RETURN(trans->getNdbError());
4466
4467 m_thd_ndb->m_scan_count++;
4468 m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4469
4470 DBUG_ASSERT(m_active_cursor==NULL);
4471 m_active_cursor= op;
4472
4473 if (uses_blob_value(table->read_set) &&
4474 get_blob_values(op, NULL, table->read_set) != 0)
4475 ERR_RETURN(op->getNdbError());
4476 } // if (check_if_pushable(NdbQueryOperationDef::TableScan))
4477
4478 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4479 DBUG_RETURN(ndb_err(trans));
4480 DBUG_PRINT("exit", ("Scan started successfully"));
4481 DBUG_RETURN(next_result(buf));
4482 } // ha_ndbcluster::full_table_scan()
4483
4484 int
set_auto_inc(THD * thd,Field * field)4485 ha_ndbcluster::set_auto_inc(THD *thd, Field *field)
4486 {
4487 DBUG_ENTER("ha_ndbcluster::set_auto_inc");
4488 bool read_bit= bitmap_is_set(table->read_set, field->field_index);
4489 bitmap_set_bit(table->read_set, field->field_index);
4490 Uint64 next_val= (Uint64) field->val_int() + 1;
4491 if (!read_bit)
4492 bitmap_clear_bit(table->read_set, field->field_index);
4493 DBUG_RETURN(set_auto_inc_val(thd, next_val));
4494 }
4495
4496 inline
4497 int
set_auto_inc_val(THD * thd,Uint64 value)4498 ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)
4499 {
4500 Ndb *ndb= get_ndb(thd);
4501 DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");
4502 #ifndef DBUG_OFF
4503 char buff[22];
4504 DBUG_PRINT("info",
4505 ("Trying to set next auto increment value to %s",
4506 llstr(value, buff)));
4507 #endif
4508 if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))
4509 {
4510 Ndb_tuple_id_range_guard g(m_share);
4511 if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)
4512 == -1)
4513 ERR_RETURN(ndb->getNdbError());
4514 }
4515 DBUG_RETURN(0);
4516 }
4517
4518 Uint32
setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])4519 ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])
4520 {
4521 Uint32 num_gets= 0;
4522 /*
4523 We need to read the hidden primary key, and possibly the FRAGMENT
4524 pseudo-column.
4525 */
4526 gets[num_gets].column= get_hidden_key_column();
4527 gets[num_gets].appStorage= &m_ref;
4528 num_gets++;
4529 if (m_user_defined_partitioning)
4530 {
4531 /* Need to read partition id to support ORDER BY columns. */
4532 gets[num_gets].column= NdbDictionary::Column::FRAGMENT;
4533 gets[num_gets].appStorage= &m_part_id;
4534 num_gets++;
4535 }
4536 return num_gets;
4537 }
4538
4539 void
get_hidden_fields_keyop(NdbOperation::OperationOptions * options,NdbOperation::GetValueSpec gets[2])4540 ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,
4541 NdbOperation::GetValueSpec gets[2])
4542 {
4543 Uint32 num_gets= setup_get_hidden_fields(gets);
4544 options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;
4545 options->extraGetValues= gets;
4546 options->numExtraGetValues= num_gets;
4547 }
4548
4549 void
get_hidden_fields_scan(NdbScanOperation::ScanOptions * options,NdbOperation::GetValueSpec gets[2])4550 ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,
4551 NdbOperation::GetValueSpec gets[2])
4552 {
4553 Uint32 num_gets= setup_get_hidden_fields(gets);
4554 options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;
4555 options->extraGetValues= gets;
4556 options->numExtraGetValues= num_gets;
4557 }
4558
4559 inline void
eventSetAnyValue(THD * thd,NdbOperation::OperationOptions * options) const4560 ha_ndbcluster::eventSetAnyValue(THD *thd,
4561 NdbOperation::OperationOptions *options) const
4562 {
4563 options->anyValue= 0;
4564 if (unlikely(m_slow_path))
4565 {
4566 /*
4567 Ignore TNTO_NO_LOGGING for slave thd. It is used to indicate
4568 log-slave-updates option. This is instead handled in the
4569 injector thread, by looking explicitly at the
4570 opt_log_slave_updates flag.
4571 */
4572 Thd_ndb *thd_ndb= get_thd_ndb(thd);
4573 if (thd->slave_thread)
4574 {
4575 /*
4576 Slave-thread, we are applying a replicated event.
4577 We set the server_id to the value received from the log which
4578 may be a composite of server_id and other data according
4579 to the server_id_bits option.
4580 In future it may be useful to support *not* mapping composite
4581 AnyValues to/from Binlogged server-ids
4582 */
4583 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4584 options->anyValue = thd_unmasked_server_id(thd);
4585 }
4586 else if (thd_ndb->trans_options & TNTO_NO_LOGGING)
4587 {
4588 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4589 ndbcluster_anyvalue_set_nologging(options->anyValue);
4590 }
4591 }
4592 #ifndef DBUG_OFF
4593 /*
4594 MySQLD will set the user-portion of AnyValue (if any) to all 1s
4595 This tests code filtering ServerIds on the value of server-id-bits.
4596 */
4597 const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");
4598 if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
4599 {
4600 options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4601 dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);
4602 }
4603 #endif
4604 }
4605
4606 #ifdef HAVE_NDB_BINLOG
4607 /**
4608 prepare_conflict_detection
4609
4610 This method is called during operation definition by the slave,
4611 when writing to a table with conflict detection defined.
4612
4613 It is responsible for defining and adding any operation filtering
4614 required, and for saving any operation definition state required
4615 for post-execute analysis
4616 */
4617 int
prepare_conflict_detection(enum_conflicting_op_type op_type,const NdbRecord * key_rec,const uchar * old_data,const uchar * new_data,NdbInterpretedCode * code,NdbOperation::OperationOptions * options)4618 ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,
4619 const NdbRecord* key_rec,
4620 const uchar* old_data,
4621 const uchar* new_data,
4622 NdbInterpretedCode* code,
4623 NdbOperation::OperationOptions* options)
4624 {
4625 DBUG_ENTER("prepare_conflict_detection");
4626
4627 int res = 0;
4628 const st_conflict_fn_def* conflict_fn = m_share->m_cfn_share->m_conflict_fn;
4629 assert( conflict_fn != NULL );
4630
4631
4632 /*
4633 Prepare interpreted code for operation (update + delete only) according
4634 to algorithm used
4635 */
4636 if (op_type != WRITE_ROW)
4637 {
4638 res = conflict_fn->prep_func(m_share->m_cfn_share,
4639 op_type,
4640 old_data,
4641 new_data,
4642 table->write_set,
4643 code);
4644
4645 if (!res)
4646 {
4647 /* Attach conflict detecting filter program to operation */
4648 options->optionsPresent|=NdbOperation::OperationOptions::OO_INTERPRETED;
4649 options->interpretedCode= code;
4650 }
4651 } // if (op_type != WRITE_ROW)
4652
4653 g_ndb_slave_state.current_conflict_defined_op_count++;
4654
4655 /* Now save data for potential insert to exceptions table... */
4656 const uchar* row_to_save = (op_type == DELETE_ROW)? old_data : new_data;
4657 Ndb_exceptions_data ex_data;
4658 ex_data.share= m_share;
4659 ex_data.key_rec= key_rec;
4660 ex_data.op_type= op_type;
4661 /*
4662 We need to save the row data for possible conflict resolution after
4663 execute().
4664 */
4665 ex_data.row= copy_row_to_buffer(m_thd_ndb, row_to_save);
4666 uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));
4667 if (ex_data.row == NULL || ex_data_buffer == NULL)
4668 {
4669 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
4670 }
4671 memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));
4672
4673 /* Store ptr to exceptions data in operation 'customdata' ptr */
4674 options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;
4675 options->customData= (void*)ex_data_buffer;
4676
4677 DBUG_RETURN(0);
4678 }
4679
4680 /**
4681 handle_conflict_op_error
4682
4683 This method is called when an error is detected after executing an
4684 operation with conflict detection active.
4685
4686 If the operation error is related to conflict detection, handling
4687 starts.
4688
4689 Handling involves incrementing the relevant counter, and optionally
4690 refreshing the row and inserting an entry into the exceptions table
4691 */
4692
4693 int
handle_conflict_op_error(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbError & err,const NdbOperation * op)4694 handle_conflict_op_error(Thd_ndb* thd_ndb,
4695 NdbTransaction* trans,
4696 const NdbError& err,
4697 const NdbOperation* op)
4698 {
4699 DBUG_ENTER("handle_conflict_op_error");
4700 DBUG_PRINT("info", ("ndb error: %d", err.code));
4701
4702 if ((err.code == (int) error_conflict_fn_violation) ||
4703 (err.code == (int) error_op_after_refresh_op) ||
4704 (err.classification == NdbError::ConstraintViolation) ||
4705 (err.classification == NdbError::NoDataFound))
4706 {
4707 DBUG_PRINT("info",
4708 ("err.code %s (int) error_conflict_fn_violation, "
4709 "err.classification %s",
4710 err.code == (int) error_conflict_fn_violation ? "==" : "!=",
4711 err.classification
4712 == NdbError::ConstraintViolation
4713 ? "== NdbError::ConstraintViolation"
4714 : (err.classification == NdbError::NoDataFound
4715 ? "== NdbError::NoDataFound" : "!=")));
4716
4717 enum_conflict_cause conflict_cause;
4718
4719 if ((err.code == (int) error_conflict_fn_violation) ||
4720 (err.code == (int) error_op_after_refresh_op))
4721 {
4722 conflict_cause= ROW_IN_CONFLICT;
4723 }
4724 else if (err.classification == NdbError::ConstraintViolation)
4725 {
4726 conflict_cause= ROW_ALREADY_EXISTS;
4727 }
4728 else
4729 {
4730 assert(err.classification == NdbError::NoDataFound);
4731 conflict_cause= ROW_DOES_NOT_EXIST;
4732 }
4733
4734 const void* buffer=op->getCustomData();
4735 assert(buffer);
4736 Ndb_exceptions_data ex_data;
4737 memcpy(&ex_data, buffer, sizeof(ex_data));
4738 NDB_SHARE *share= ex_data.share;
4739 const NdbRecord* key_rec= ex_data.key_rec;
4740 const uchar* row= ex_data.row;
4741 enum_conflicting_op_type op_type = ex_data.op_type;
4742 DBUG_ASSERT(share != NULL && row != NULL);
4743
4744 NDB_CONFLICT_FN_SHARE* cfn_share= share->m_cfn_share;
4745 if (cfn_share)
4746 {
4747 enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;
4748 bool haveExTable = cfn_share->m_ex_tab != NULL;
4749
4750 g_ndb_slave_state.current_violation_count[cft]++;
4751
4752 {
4753 NdbError handle_error;
4754 if (handle_row_conflict(cfn_share,
4755 share->table_name,
4756 key_rec,
4757 row,
4758 op_type,
4759 conflict_cause,
4760 err,
4761 trans,
4762 handle_error))
4763 {
4764 /* Error with handling of row conflict */
4765 char msg[FN_REFLEN];
4766 my_snprintf(msg, sizeof(msg), "Row conflict handling "
4767 "on table %s hit Ndb error %d '%s'",
4768 share->table_name,
4769 handle_error.code,
4770 handle_error.message);
4771
4772 if (handle_error.status == NdbError::TemporaryError)
4773 {
4774 /* Slave will roll back and retry entire transaction. */
4775 ERR_RETURN(handle_error);
4776 }
4777 else
4778 {
4779 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4780 ER_EXCEPTIONS_WRITE_ERROR,
4781 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
4782 /* Slave will stop replication. */
4783 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
4784 }
4785 }
4786 }
4787
4788
4789 if (haveExTable)
4790 {
4791 NdbError ex_err;
4792 if (write_conflict_row(share, trans, row, ex_err))
4793 {
4794 char msg[FN_REFLEN];
4795 my_snprintf(msg, sizeof(msg), "table %s NDB error %d '%s'",
4796 cfn_share->m_ex_tab->getName(),
4797 ex_err.code, ex_err.message);
4798
4799 NdbDictionary::Dictionary* dict= thd_ndb->ndb->getDictionary();
4800
4801 if (ex_err.classification == NdbError::SchemaError)
4802 {
4803 dict->removeTableGlobal(*(cfn_share->m_ex_tab), false);
4804 cfn_share->m_ex_tab= NULL;
4805 }
4806 else if (ex_err.status == NdbError::TemporaryError)
4807 {
4808 /* Slave will roll back and retry entire transaction. */
4809 ERR_RETURN(ex_err);
4810 }
4811 else
4812 {
4813 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4814 ER_EXCEPTIONS_WRITE_ERROR,
4815 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
4816 /* Slave will stop replication. */
4817 DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
4818 }
4819 }
4820 } // if (haveExTable)
4821
4822 DBUG_RETURN(0);
4823 }
4824 else
4825 {
4826 DBUG_PRINT("info", ("missing cfn_share"));
4827 DBUG_RETURN(0); // TODO : Correct?
4828 }
4829 }
4830 else
4831 {
4832 /* Non conflict related error */
4833 DBUG_PRINT("info", ("err.code == %u", err.code));
4834 DBUG_RETURN(err.code);
4835 }
4836
4837 DBUG_RETURN(0); // Reachable?
4838 }
4839 #endif /* HAVE_NDB_BINLOG */
4840
4841
4842 #ifdef HAVE_NDB_BINLOG
4843 /*
4844 is_serverid_local
4845 */
is_serverid_local(Uint32 serverid)4846 static bool is_serverid_local(Uint32 serverid)
4847 {
4848 /*
4849 If it's not our serverid, check the
4850 IGNORE_SERVER_IDS setting to check if
4851 it's local.
4852 */
4853 return ((serverid == ::server_id) ||
4854 ndb_mi_get_ignore_server_id(serverid));
4855 }
4856 #endif
4857
write_row(uchar * record)4858 int ha_ndbcluster::write_row(uchar *record)
4859 {
4860 DBUG_ENTER("ha_ndbcluster::write_row");
4861 #ifdef HAVE_NDB_BINLOG
4862 if (m_share == ndb_apply_status_share && table->in_use->slave_thread)
4863 {
4864 uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();
4865 uint64 row_epoch;
4866 memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),
4867 sizeof(row_server_id));
4868 memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),
4869 sizeof(row_epoch));
4870 g_ndb_slave_state.atApplyStatusWrite(master_server_id,
4871 row_server_id,
4872 row_epoch,
4873 is_serverid_local(row_server_id));
4874 }
4875 #endif /* HAVE_NDB_BINLOG */
4876 DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));
4877 }
4878
4879 /**
4880 Insert one record into NDB
4881 */
ndb_write_row(uchar * record,bool primary_key_update,bool batched_update)4882 int ha_ndbcluster::ndb_write_row(uchar *record,
4883 bool primary_key_update,
4884 bool batched_update)
4885 {
4886 bool has_auto_increment;
4887 const NdbOperation *op;
4888 THD *thd= table->in_use;
4889 Thd_ndb *thd_ndb= m_thd_ndb;
4890 NdbTransaction *trans;
4891 uint32 part_id;
4892 int error;
4893 NdbOperation::SetValueSpec sets[3];
4894 Uint32 num_sets= 0;
4895 DBUG_ENTER("ha_ndbcluster::ndb_write_row");
4896
4897 error = check_slave_state(thd);
4898 if (unlikely(error))
4899 DBUG_RETURN(error);
4900
4901 has_auto_increment= (table->next_number_field && record == table->record[0]);
4902
4903 if (has_auto_increment && table_share->primary_key != MAX_KEY)
4904 {
4905 /*
4906 * Increase any auto_incremented primary key
4907 */
4908 m_skip_auto_increment= FALSE;
4909 if ((error= update_auto_increment()))
4910 DBUG_RETURN(error);
4911 m_skip_auto_increment= (insert_id_for_cur_row == 0);
4912 }
4913
4914 /*
4915 * If IGNORE the ignore constraint violations on primary and unique keys
4916 */
4917 if (!m_use_write && m_ignore_dup_key)
4918 {
4919 /*
4920 compare if expression with that in start_bulk_insert()
4921 start_bulk_insert will set parameters to ensure that each
4922 write_row is committed individually
4923 */
4924 int peek_res= peek_indexed_rows(record, NDB_INSERT);
4925
4926 if (!peek_res)
4927 {
4928 DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
4929 }
4930 if (peek_res != HA_ERR_KEY_NOT_FOUND)
4931 DBUG_RETURN(peek_res);
4932 }
4933
4934 bool uses_blobs= uses_blob_value(table->write_set);
4935
4936 Uint64 auto_value;
4937 const NdbRecord *key_rec;
4938 const uchar *key_row;
4939 if (table_share->primary_key == MAX_KEY)
4940 {
4941 /* Table has hidden primary key. */
4942 Ndb *ndb= get_ndb(thd);
4943 uint retries= NDB_AUTO_INCREMENT_RETRIES;
4944 int retry_sleep= 30; /* 30 milliseconds, transaction */
4945 for (;;)
4946 {
4947 Ndb_tuple_id_range_guard g(m_share);
4948 if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)
4949 {
4950 if (--retries && !thd->killed &&
4951 ndb->getNdbError().status == NdbError::TemporaryError)
4952 {
4953 do_retry_sleep(retry_sleep);
4954 continue;
4955 }
4956 ERR_RETURN(ndb->getNdbError());
4957 }
4958 break;
4959 }
4960 sets[num_sets].column= get_hidden_key_column();
4961 sets[num_sets].value= &auto_value;
4962 num_sets++;
4963 key_rec= m_ndb_hidden_key_record;
4964 key_row= (const uchar *)&auto_value;
4965 }
4966 else
4967 {
4968 key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
4969 key_row= record;
4970 }
4971
4972 trans= thd_ndb->trans;
4973 if (m_user_defined_partitioning)
4974 {
4975 DBUG_ASSERT(m_use_partition_pruning);
4976 longlong func_value= 0;
4977 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
4978 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4979 dbug_tmp_restore_column_map(table->read_set, old_map);
4980 if (unlikely(error))
4981 {
4982 m_part_info->err_value= func_value;
4983 DBUG_RETURN(error);
4984 }
4985 {
4986 /*
4987 We need to set the value of the partition function value in
4988 NDB since the NDB kernel doesn't have easy access to the function
4989 to calculate the value.
4990 */
4991 if (func_value >= INT_MAX32)
4992 func_value= INT_MAX32;
4993 sets[num_sets].column= get_partition_id_column();
4994 sets[num_sets].value= &func_value;
4995 num_sets++;
4996 }
4997 if (!trans)
4998 if (unlikely(!(trans= start_transaction_part_id(part_id, error))))
4999 DBUG_RETURN(error);
5000 }
5001 else if (!trans)
5002 {
5003 if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))
5004 DBUG_RETURN(error);
5005 }
5006 DBUG_ASSERT(trans);
5007
5008 ha_statistic_increment(&SSV::ha_write_count);
5009
5010 /*
5011 Setup OperationOptions
5012 */
5013 NdbOperation::OperationOptions options;
5014 NdbOperation::OperationOptions *poptions = NULL;
5015 options.optionsPresent=0;
5016
5017 eventSetAnyValue(thd, &options);
5018 bool need_flush= add_row_check_if_batch_full(thd_ndb);
5019
5020 const Uint32 authorValue = 1;
5021 if ((thd->slave_thread) &&
5022 (m_table->getExtraRowAuthorBits()))
5023 {
5024 /* Set author to indicate slave updated last */
5025 sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5026 sets[num_sets].value= &authorValue;
5027 num_sets++;
5028 }
5029
5030 if (m_user_defined_partitioning)
5031 {
5032 options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
5033 options.partitionId= part_id;
5034 }
5035 if (num_sets)
5036 {
5037 options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;
5038 options.extraSetValues= sets;
5039 options.numExtraSetValues= num_sets;
5040 }
5041 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5042 {
5043 options.optionsPresent |=
5044 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5045 }
5046
5047 if (options.optionsPresent != 0)
5048 poptions=&options;
5049
5050 const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;
5051 uint32 tmpBitmapSpace[bitmapSz];
5052 MY_BITMAP tmpBitmap;
5053 MY_BITMAP *user_cols_written_bitmap;
5054 #ifdef HAVE_NDB_BINLOG
5055 bool haveConflictFunction =
5056 (thd->slave_thread &&
5057 m_share->m_cfn_share &&
5058 m_share->m_cfn_share->m_conflict_fn);
5059 #endif
5060
5061 if (m_use_write
5062 #ifdef HAVE_NDB_BINLOG
5063 /* Conflict detection must use normal Insert */
5064 && !haveConflictFunction
5065 #endif
5066 )
5067 {
5068 /* Should we use the supplied table writeset or not?
5069 * For a REPLACE command, we should ignore it, and write
5070 * all columns to get correct REPLACE behaviour.
5071 * For applying Binlog events, we need to use the writeset
5072 * to avoid trampling unchanged columns when an update is
5073 * logged as a WRITE
5074 */
5075 bool useWriteSet= isManualBinlogExec(thd);
5076
5077 #ifdef HAVE_NDB_BINLOG
5078 /* Slave always uses writeset
5079 * TODO : What about SBR replicating a
5080 * REPLACE command?
5081 */
5082 useWriteSet |= thd->slave_thread;
5083 #endif
5084 uchar* mask;
5085
5086 if (useWriteSet)
5087 {
5088 user_cols_written_bitmap= table->write_set;
5089 mask= (uchar *)(user_cols_written_bitmap->bitmap);
5090 }
5091 else
5092 {
5093 user_cols_written_bitmap= NULL;
5094 mask= NULL;
5095 }
5096 /* TODO : Add conflict detection etc when interpreted write supported */
5097 op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,
5098 (char *)record, mask,
5099 poptions, sizeof(NdbOperation::OperationOptions));
5100 }
5101 else
5102 {
5103 #ifdef HAVE_NDB_BINLOG
5104 if (haveConflictFunction)
5105 {
5106 /* Conflict detection in slave thread */
5107 if (unlikely((error = prepare_conflict_detection(WRITE_ROW,
5108 key_rec,
5109 NULL, /* old_data */
5110 record, /* new_data */
5111 NULL, /* code */
5112 &options))))
5113 DBUG_RETURN(error);
5114 }
5115 #endif
5116 uchar *mask;
5117
5118 /* Check whether Ndb table definition includes any default values. */
5119 if (m_table->hasDefaultValues())
5120 {
5121 DBUG_PRINT("info", ("Not sending values for native defaulted columns"));
5122
5123 /*
5124 If Ndb is unaware of the table's defaults, we must provide all column values to the insert.
5125 This is done using a NULL column mask.
5126 If Ndb is aware of the table's defaults, we only need to provide
5127 the columns explicitly mentioned in the write set,
5128 plus any extra columns required due to bug#41616.
5129 plus the primary key columns required due to bug#42238.
5130 */
5131 /*
5132 The following code for setting user_cols_written_bitmap
5133 should be removed after BUG#41616 and Bug#42238 are fixed
5134 */
5135 /* Copy table write set so that we can add to it */
5136 user_cols_written_bitmap= &tmpBitmap;
5137 bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,
5138 table->write_set->n_bits, false);
5139 bitmap_copy(user_cols_written_bitmap, table->write_set);
5140
5141 for (uint i= 0; i < table->s->fields; i++)
5142 {
5143 Field *field= table->field[i];
5144 DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "
5145 "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",
5146 i,
5147 field->field_index,
5148 field->real_type(),
5149 field->flags & NO_DEFAULT_VALUE_FLAG,
5150 field->flags & PRI_KEY_FLAG));
5151 if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616
5152 PRI_KEY_FLAG)) || // bug 42238
5153 ! type_supports_default_value(field->real_type()))
5154 {
5155 bitmap_set_bit(user_cols_written_bitmap, field->field_index);
5156 }
5157 }
5158
5159 mask= (uchar *)(user_cols_written_bitmap->bitmap);
5160 }
5161 else
5162 {
5163 /* No defaults in kernel, provide all columns ourselves */
5164 DBUG_PRINT("info", ("No native defaults, sending all values"));
5165 user_cols_written_bitmap= NULL;
5166 mask = NULL;
5167 }
5168
5169 /* Using insert, we write all non default columns */
5170 op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,
5171 (char *)record, mask, // Default value should be masked
5172 poptions, sizeof(NdbOperation::OperationOptions));
5173 }
5174 if (!(op))
5175 ERR_RETURN(trans->getNdbError());
5176
5177 bool do_batch= !need_flush &&
5178 (batched_update || thd_allow_batch(thd));
5179 uint blob_count= 0;
5180 if (table_share->blob_fields > 0)
5181 {
5182 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5183 /* Set Blob values for all columns updated by the operation */
5184 int res= set_blob_values(op, record - table->record[0],
5185 user_cols_written_bitmap, &blob_count, do_batch);
5186 dbug_tmp_restore_column_map(table->read_set, old_map);
5187 if (res != 0)
5188 DBUG_RETURN(res);
5189 }
5190
5191 m_rows_changed++;
5192
5193 /*
5194 Execute write operation
5195 NOTE When doing inserts with many values in
5196 each INSERT statement it should not be necessary
5197 to NoCommit the transaction between each row.
5198 Find out how this is detected!
5199 */
5200 m_rows_inserted++;
5201 no_uncommitted_rows_update(1);
5202 if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||
5203 primary_key_update ||
5204 need_flush)
5205 {
5206 int res= flush_bulk_insert();
5207 if (res != 0)
5208 {
5209 m_skip_auto_increment= TRUE;
5210 DBUG_RETURN(res);
5211 }
5212 }
5213 if ((has_auto_increment) && (m_skip_auto_increment))
5214 {
5215 int ret_val;
5216 if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5217 {
5218 DBUG_RETURN(ret_val);
5219 }
5220 }
5221 m_skip_auto_increment= TRUE;
5222
5223 DBUG_PRINT("exit",("ok"));
5224 DBUG_RETURN(0);
5225 }
5226
5227
5228 /* Compare if an update changes the primary key in a row. */
primary_key_cmp(const uchar * old_row,const uchar * new_row)5229 int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)
5230 {
5231 uint keynr= table_share->primary_key;
5232 KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
5233 KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;
5234
5235 for (; key_part != end ; key_part++)
5236 {
5237 if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))
5238 continue;
5239
5240 /* The primary key does not allow NULLs. */
5241 DBUG_ASSERT(!key_part->null_bit);
5242
5243 if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
5244 {
5245
5246 if (key_part->field->cmp_binary((old_row + key_part->offset),
5247 (new_row + key_part->offset),
5248 (ulong) key_part->length))
5249 return 1;
5250 }
5251 else
5252 {
5253 if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
5254 key_part->length))
5255 return 1;
5256 }
5257 }
5258 return 0;
5259 }
5260
5261 #ifdef HAVE_NDB_BINLOG
5262 int
handle_row_conflict(NDB_CONFLICT_FN_SHARE * cfn_share,const char * table_name,const NdbRecord * key_rec,const uchar * pk_row,enum_conflicting_op_type op_type,enum_conflict_cause conflict_cause,const NdbError & conflict_error,NdbTransaction * conflict_trans,NdbError & err)5263 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
5264 const char* table_name,
5265 const NdbRecord* key_rec,
5266 const uchar* pk_row,
5267 enum_conflicting_op_type op_type,
5268 enum_conflict_cause conflict_cause,
5269 const NdbError& conflict_error,
5270 NdbTransaction* conflict_trans,
5271 NdbError& err)
5272 {
5273 DBUG_ENTER("handle_row_conflict");
5274
5275 if (cfn_share->m_flags & CFF_REFRESH_ROWS)
5276 {
5277 /* A conflict has been detected between an applied replicated operation
5278 * and the data in the DB.
5279 * The attempt to change the local DB will have been rejected.
5280 * We now take steps to generate a refresh Binlog event so that
5281 * other clusters will be re-aligned.
5282 */
5283 DBUG_PRINT("info", ("Conflict on table %s. Operation type : %s, "
5284 "conflict cause :%s, conflict error : %u : %s",
5285 table_name,
5286 ((op_type == WRITE_ROW)? "WRITE_ROW":
5287 (op_type == UPDATE_ROW)? "UPDATE_ROW":
5288 "DELETE_ROW"),
5289 ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":
5290 (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":
5291 "ROW_IN_CONFLICT"),
5292 conflict_error.code,
5293 conflict_error.message));
5294
5295 assert(key_rec != NULL);
5296 assert(pk_row != NULL);
5297
5298 /* When the slave splits an epoch into batches, a conflict row detected
5299 * and refreshed in an early batch can be written to by operations in
5300 * a later batch. As the operations will not have applied, and the
5301 * row has already been refreshed, we need not attempt to refresh
5302 * it again
5303 */
5304 if ((conflict_cause == ROW_IN_CONFLICT) &&
5305 (conflict_error.code == (int) error_op_after_refresh_op))
5306 {
5307 /* Attempt to apply an operation after the row was refreshed
5308 * Ignore the error
5309 */
5310 DBUG_PRINT("info", ("Operation after refresh error - ignoring"));
5311 DBUG_RETURN(0);
5312 }
5313
5314 /* When a delete operation finds that the row does not exist, it indicates
5315 * a DELETE vs DELETE conflict. If we refresh the row then we can get
5316 * non deterministic behaviour depending on slave batching as follows :
5317 * Row is deleted
5318 *
5319 * Case 1
5320 * Slave applied DELETE, INSERT in 1 batch
5321 *
5322 * After first batch, the row is present (due to INSERT), it is
5323 * refreshed.
5324 *
5325 * Case 2
5326 * Slave applied DELETE in 1 batch, INSERT in 2nd batch
5327 *
5328 * After first batch, the row is not present, it is refreshed
5329 * INSERT is then rejected.
5330 *
5331 * The problem of not being able to 'record' a DELETE vs DELETE conflict
5332 * is known. We attempt at least to give consistent behaviour for
5333 * DELETE vs DELETE conflicts by :
5334 * NOT refreshing a row when a DELETE vs DELETE conflict is detected
5335 * This should map all batching scenarios onto Case1.
5336 */
5337 if ((op_type == DELETE_ROW) &&
5338 (conflict_cause == ROW_DOES_NOT_EXIST))
5339 {
5340 DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));
5341 DBUG_RETURN(0);
5342 }
5343
5344 /* Create a refresh to operation to realign other clusters */
5345 // TODO AnyValue
5346 // TODO Do we ever get non-PK key?
5347 // Keyless table?
5348 // Unique index
5349 const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,
5350 (const char*) pk_row);
5351
5352 if (!refresh_op)
5353 {
5354 err= conflict_trans->getNdbError();
5355 DBUG_RETURN(1);
5356 }
5357 } /* if (cfn_share->m_flags & CFF_REFRESH_ROWS) */
5358
5359 DBUG_RETURN(0);
5360 };
5361 #endif /* HAVE_NDB_BINLOG */
5362
5363 /**
5364 Update one record in NDB using primary key.
5365 */
5366
start_bulk_update()5367 bool ha_ndbcluster::start_bulk_update()
5368 {
5369 DBUG_ENTER("ha_ndbcluster::start_bulk_update");
5370 if (!m_use_write && m_ignore_dup_key)
5371 {
5372 DBUG_PRINT("info", ("Batching turned off as duplicate key is "
5373 "ignored by using peek_row"));
5374 DBUG_RETURN(TRUE);
5375 }
5376 DBUG_RETURN(FALSE);
5377 }
5378
bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)5379 int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,
5380 uint *dup_key_found)
5381 {
5382 DBUG_ENTER("ha_ndbcluster::bulk_update_row");
5383 *dup_key_found= 0;
5384 DBUG_RETURN(ndb_update_row(old_data, new_data, 1));
5385 }
5386
exec_bulk_update(uint * dup_key_found)5387 int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)
5388 {
5389 NdbTransaction* trans= m_thd_ndb->trans;
5390 DBUG_ENTER("ha_ndbcluster::exec_bulk_update");
5391 *dup_key_found= 0;
5392
5393 // m_handler must be NULL or point to _this_ handler instance
5394 assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
5395
5396 if (m_thd_ndb->m_handler &&
5397 m_read_before_write_removal_possible)
5398 {
5399 /*
5400 This is an autocommit involving only one table and rbwr is on
5401
5402 Commit the autocommit transaction early(before the usual place
5403 in ndbcluster_commit) in order to:
5404 1) save one round trip, "no-commit+commit" converted to "commit"
5405 2) return the correct number of updated and affected rows
5406 to the update loop(which will ask handler in rbwr mode)
5407 */
5408 DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
5409 uint ignore_count= 0;
5410 const int ignore_error= 1;
5411 if (execute_commit(table->in_use, m_thd_ndb, trans,
5412 m_thd_ndb->m_force_send, ignore_error,
5413 &ignore_count) != 0)
5414 {
5415 no_uncommitted_rows_execute_failure();
5416 DBUG_RETURN(ndb_err(trans));
5417 }
5418 DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
5419 assert(m_rows_changed >= ignore_count);
5420 assert(m_rows_updated >= ignore_count);
5421 m_rows_changed-= ignore_count;
5422 m_rows_updated-= ignore_count;
5423 DBUG_RETURN(0);
5424 }
5425
5426 if (m_thd_ndb->m_unsent_bytes == 0)
5427 {
5428 DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
5429 DBUG_RETURN(0);
5430 }
5431
5432 if (thd_allow_batch(table->in_use))
5433 {
5434 /*
5435 Turned on by @@transaction_allow_batching=ON
5436 or implicitly by slave exec thread
5437 */
5438 DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
5439 DBUG_RETURN(0);
5440 }
5441
5442 if (m_thd_ndb->m_handler &&
5443 !m_blobs_pending)
5444 {
5445 // Execute at commit time(in 'ndbcluster_commit') to save a round trip
5446 DBUG_PRINT("exit", ("skip execute - simple autocommit"));
5447 DBUG_RETURN(0);
5448 }
5449
5450 uint ignore_count= 0;
5451 if (execute_no_commit(m_thd_ndb, trans,
5452 m_ignore_no_key || m_read_before_write_removal_used,
5453 &ignore_count) != 0)
5454 {
5455 no_uncommitted_rows_execute_failure();
5456 DBUG_RETURN(ndb_err(trans));
5457 }
5458 assert(m_rows_changed >= ignore_count);
5459 assert(m_rows_updated >= ignore_count);
5460 m_rows_changed-= ignore_count;
5461 m_rows_updated-= ignore_count;
5462 DBUG_RETURN(0);
5463 }
5464
end_bulk_update()5465 void ha_ndbcluster::end_bulk_update()
5466 {
5467 DBUG_ENTER("ha_ndbcluster::end_bulk_update");
5468 DBUG_VOID_RETURN;
5469 }
5470
update_row(const uchar * old_data,uchar * new_data)5471 int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)
5472 {
5473 return ndb_update_row(old_data, new_data, 0);
5474 }
5475
5476 void
setup_key_ref_for_ndb_record(const NdbRecord ** key_rec,const uchar ** key_row,const uchar * record,bool use_active_index)5477 ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,
5478 const uchar **key_row,
5479 const uchar *record,
5480 bool use_active_index)
5481 {
5482 DBUG_ENTER("setup_key_ref_for_ndb_record");
5483 if (use_active_index)
5484 {
5485 /* Use unique key to access table */
5486 DBUG_PRINT("info", ("Using unique index (%u)", active_index));
5487 *key_rec= m_index[active_index].ndb_unique_record_row;
5488 *key_row= record;
5489 }
5490 else if (table_share->primary_key != MAX_KEY)
5491 {
5492 /* Use primary key to access table */
5493 DBUG_PRINT("info", ("Using primary key"));
5494 *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
5495 *key_row= record;
5496 }
5497 else
5498 {
5499 /* Use hidden primary key previously read into m_ref. */
5500 DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));
5501 /* Can't use hidden pk if we didn't read it first */
5502 DBUG_ASSERT(m_read_before_write_removal_used == false);
5503 *key_rec= m_ndb_hidden_key_record;
5504 *key_row= (const uchar *)(&m_ref);
5505 }
5506 DBUG_VOID_RETURN;
5507 }
5508
5509
5510 /*
5511 Update one record in NDB using primary key
5512 */
5513
ndb_update_row(const uchar * old_data,uchar * new_data,int is_bulk_update)5514 int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,
5515 int is_bulk_update)
5516 {
5517 THD *thd= table->in_use;
5518 Thd_ndb *thd_ndb= m_thd_ndb;
5519 NdbTransaction *trans= thd_ndb->trans;
5520 NdbScanOperation* cursor= m_active_cursor;
5521 const NdbOperation *op;
5522 uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);
5523 int error;
5524 longlong func_value;
5525 Uint32 func_value_uint32;
5526 bool have_pk= (table_share->primary_key != MAX_KEY);
5527 bool pk_update= (!m_read_before_write_removal_possible &&
5528 have_pk &&
5529 bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&
5530 primary_key_cmp(old_data, new_data));
5531 bool batch_allowed= !m_update_cannot_batch &&
5532 (is_bulk_update || thd_allow_batch(thd));
5533 NdbOperation::SetValueSpec sets[2];
5534 Uint32 num_sets= 0;
5535
5536 DBUG_ENTER("ndb_update_row");
5537 DBUG_ASSERT(trans);
5538
5539 error = check_slave_state(thd);
5540 if (unlikely(error))
5541 DBUG_RETURN(error);
5542
5543 /*
5544 * If IGNORE the ignore constraint violations on primary and unique keys,
5545 * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
5546 */
5547 if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
5548 thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
5549 {
5550 NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;
5551 int peek_res= peek_indexed_rows(new_data, write_op);
5552
5553 if (!peek_res)
5554 {
5555 DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
5556 }
5557 if (peek_res != HA_ERR_KEY_NOT_FOUND)
5558 DBUG_RETURN(peek_res);
5559 }
5560
5561 ha_statistic_increment(&SSV::ha_update_count);
5562
5563 bool skip_partition_for_unique_index= FALSE;
5564 if (m_use_partition_pruning)
5565 {
5566 if (!cursor && m_read_before_write_removal_used)
5567 {
5568 ndb_index_type type= get_index_type(active_index);
5569 /*
5570 Ndb unique indexes are global so when
5571 m_read_before_write_removal_used is active
5572 the unique index can be used directly for update
5573 without finding the partitions
5574 */
5575 if (type == UNIQUE_INDEX ||
5576 type == UNIQUE_ORDERED_INDEX)
5577 {
5578 skip_partition_for_unique_index= TRUE;
5579 goto skip_partition_pruning;
5580 }
5581 }
5582 if ((error= get_parts_for_update(old_data, new_data, table->record[0],
5583 m_part_info, &old_part_id, &new_part_id,
5584 &func_value)))
5585 {
5586 m_part_info->err_value= func_value;
5587 DBUG_RETURN(error);
5588 }
5589 DBUG_PRINT("info", ("old_part_id: %u new_part_id: %u", old_part_id, new_part_id));
5590 skip_partition_pruning:
5591 (void)0;
5592 }
5593
5594 /*
5595 * Check for update of primary key or partition change
5596 * for special handling
5597 */
5598 if (pk_update || old_part_id != new_part_id)
5599 {
5600 DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data, old_part_id));
5601 }
5602 /*
5603 If we are updating a unique key with auto_increment
5604 then we need to update the auto_increment counter
5605 */
5606 if (table->found_next_number_field &&
5607 bitmap_is_set(table->write_set,
5608 table->found_next_number_field->field_index) &&
5609 (error= set_auto_inc(thd, table->found_next_number_field)))
5610 {
5611 DBUG_RETURN(error);
5612 }
5613 /*
5614 Set only non-primary-key attributes.
5615 We already checked that any primary key attribute in write_set has no
5616 real changes.
5617 */
5618 bitmap_copy(&m_bitmap, table->write_set);
5619 bitmap_subtract(&m_bitmap, m_pk_bitmap_p);
5620 uchar *mask= (uchar *)(m_bitmap.bitmap);
5621 DBUG_ASSERT(!pk_update);
5622
5623 NdbOperation::OperationOptions *poptions = NULL;
5624 NdbOperation::OperationOptions options;
5625 options.optionsPresent=0;
5626
5627 /* Need to set the value of any user-defined partitioning function.
5628 (excecpt for when using unique index)
5629 */
5630 if (m_user_defined_partitioning && !skip_partition_for_unique_index)
5631 {
5632 if (func_value >= INT_MAX32)
5633 func_value_uint32= INT_MAX32;
5634 else
5635 func_value_uint32= (uint32)func_value;
5636 sets[num_sets].column= get_partition_id_column();
5637 sets[num_sets].value= &func_value_uint32;
5638 num_sets++;
5639
5640 if (!cursor)
5641 {
5642 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
5643 options.partitionId= new_part_id;
5644 }
5645 }
5646
5647 eventSetAnyValue(thd, &options);
5648
5649 bool need_flush= add_row_check_if_batch_full(thd_ndb);
5650
5651 const Uint32 authorValue = 1;
5652 if ((thd->slave_thread) &&
5653 (m_table->getExtraRowAuthorBits()))
5654 {
5655 /* Set author to indicate slave updated last */
5656 sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5657 sets[num_sets].value= &authorValue;
5658 num_sets++;
5659 }
5660
5661 if (num_sets)
5662 {
5663 options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;
5664 options.extraSetValues= sets;
5665 options.numExtraSetValues= num_sets;
5666 }
5667
5668 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5669 {
5670 options.optionsPresent |=
5671 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5672 }
5673
5674 if (cursor)
5675 {
5676 /*
5677 We are scanning records and want to update the record
5678 that was just found, call updateCurrentTuple on the cursor
5679 to take over the lock to a new update operation
5680 And thus setting the primary key of the record from
5681 the active record in cursor
5682 */
5683 DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",
5684 table->write_set->bitmap[0]));
5685
5686 if (options.optionsPresent != 0)
5687 poptions = &options;
5688
5689 if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,
5690 (const char*)new_data, mask,
5691 poptions,
5692 sizeof(NdbOperation::OperationOptions))))
5693 ERR_RETURN(trans->getNdbError());
5694
5695 m_lock_tuple= FALSE;
5696 thd_ndb->m_unsent_bytes+= 12;
5697 }
5698 else
5699 {
5700 const NdbRecord *key_rec;
5701 const uchar *key_row;
5702 setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,
5703 m_read_before_write_removal_used);
5704
5705 #ifdef HAVE_NDB_BINLOG
5706 Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
5707 NdbInterpretedCode code(m_table, buffer,
5708 sizeof(buffer)/sizeof(buffer[0]));
5709
5710 if (thd->slave_thread && m_share->m_cfn_share &&
5711 m_share->m_cfn_share->m_conflict_fn)
5712 {
5713 /* Conflict resolution in slave thread. */
5714 if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,
5715 key_rec,
5716 old_data,
5717 new_data,
5718 &code,
5719 &options))))
5720 DBUG_RETURN(error);
5721 }
5722 #endif /* HAVE_NDB_BINLOG */
5723 if (options.optionsPresent !=0)
5724 poptions= &options;
5725
5726 if (!(op= trans->updateTuple(key_rec, (const char *)key_row,
5727 m_ndb_record, (const char*)new_data, mask,
5728 poptions,
5729 sizeof(NdbOperation::OperationOptions))))
5730 ERR_RETURN(trans->getNdbError());
5731 }
5732
5733 uint blob_count= 0;
5734 if (uses_blob_value(table->write_set))
5735 {
5736 int row_offset= new_data - table->record[0];
5737 int res= set_blob_values(op, row_offset, table->write_set, &blob_count,
5738 (batch_allowed && !need_flush));
5739 if (res != 0)
5740 DBUG_RETURN(res);
5741 }
5742 uint ignore_count= 0;
5743 /*
5744 Batch update operation if we are doing a scan for update, unless
5745 there exist UPDATE AFTER triggers
5746 */
5747 if (m_update_cannot_batch ||
5748 !(cursor || (batch_allowed && have_pk)) ||
5749 need_flush)
5750 {
5751 if (execute_no_commit(m_thd_ndb, trans,
5752 m_ignore_no_key || m_read_before_write_removal_used,
5753 &ignore_count) != 0)
5754 {
5755 no_uncommitted_rows_execute_failure();
5756 DBUG_RETURN(ndb_err(trans));
5757 }
5758 }
5759 else if (blob_count > 0)
5760 m_blobs_pending= TRUE;
5761
5762 m_rows_changed++;
5763 m_rows_updated++;
5764
5765 assert(m_rows_changed >= ignore_count);
5766 assert(m_rows_updated >= ignore_count);
5767 m_rows_changed-= ignore_count;
5768 m_rows_updated-= ignore_count;
5769
5770 DBUG_RETURN(0);
5771 }
5772
5773
5774 /*
5775 handler delete interface
5776 */
5777
delete_row(const uchar * record)5778 int ha_ndbcluster::delete_row(const uchar *record)
5779 {
5780 return ndb_delete_row(record, FALSE);
5781 }
5782
start_bulk_delete()5783 bool ha_ndbcluster::start_bulk_delete()
5784 {
5785 DBUG_ENTER("start_bulk_delete");
5786 m_is_bulk_delete = true;
5787 DBUG_RETURN(0); // Bulk delete used by handler
5788 }
5789
end_bulk_delete()5790 int ha_ndbcluster::end_bulk_delete()
5791 {
5792 NdbTransaction* trans= m_thd_ndb->trans;
5793 DBUG_ENTER("end_bulk_delete");
5794 assert(m_is_bulk_delete); // Don't allow end() without start()
5795 m_is_bulk_delete = false;
5796
5797 // m_handler must be NULL or point to _this_ handler instance
5798 assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
5799
5800 if (m_thd_ndb->m_handler &&
5801 m_read_before_write_removal_possible)
5802 {
5803 /*
5804 This is an autocommit involving only one table and rbwr is on
5805
5806 Commit the autocommit transaction early(before the usual place
5807 in ndbcluster_commit) in order to:
5808 1) save one round trip, "no-commit+commit" converted to "commit"
5809 2) return the correct number of updated and affected rows
5810 to the delete loop(which will ask handler in rbwr mode)
5811 */
5812 DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
5813 uint ignore_count= 0;
5814 const int ignore_error= 1;
5815 if (execute_commit(table->in_use, m_thd_ndb, trans,
5816 m_thd_ndb->m_force_send, ignore_error,
5817 &ignore_count) != 0)
5818 {
5819 no_uncommitted_rows_execute_failure();
5820 DBUG_RETURN(ndb_err(trans));
5821 }
5822 DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
5823 assert(m_rows_deleted >= ignore_count);
5824 m_rows_deleted-= ignore_count;
5825 DBUG_RETURN(0);
5826 }
5827
5828 if (m_thd_ndb->m_unsent_bytes == 0)
5829 {
5830 DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
5831 DBUG_RETURN(0);
5832 }
5833
5834 if (thd_allow_batch(table->in_use))
5835 {
5836 /*
5837 Turned on by @@transaction_allow_batching=ON
5838 or implicitly by slave exec thread
5839 */
5840 DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
5841 DBUG_RETURN(0);
5842 }
5843
5844 if (m_thd_ndb->m_handler)
5845 {
5846 // Execute at commit time(in 'ndbcluster_commit') to save a round trip
5847 DBUG_PRINT("exit", ("skip execute - simple autocommit"));
5848 DBUG_RETURN(0);
5849 }
5850
5851 uint ignore_count= 0;
5852 if (execute_no_commit(m_thd_ndb, trans,
5853 m_ignore_no_key || m_read_before_write_removal_used,
5854 &ignore_count) != 0)
5855 {
5856 no_uncommitted_rows_execute_failure();
5857 DBUG_RETURN(ndb_err(trans));
5858 }
5859
5860 assert(m_rows_deleted >= ignore_count);
5861 m_rows_deleted-= ignore_count;
5862 no_uncommitted_rows_update(ignore_count);
5863 DBUG_RETURN(0);
5864 }
5865
5866
5867 /**
5868 Delete one record from NDB, using primary key .
5869 */
5870
ndb_delete_row(const uchar * record,bool primary_key_update)5871 int ha_ndbcluster::ndb_delete_row(const uchar *record,
5872 bool primary_key_update)
5873 {
5874 THD *thd= table->in_use;
5875 Thd_ndb *thd_ndb= get_thd_ndb(thd);
5876 NdbTransaction *trans= m_thd_ndb->trans;
5877 NdbScanOperation* cursor= m_active_cursor;
5878 const NdbOperation *op;
5879 uint32 part_id= ~uint32(0);
5880 int error;
5881 bool allow_batch= !m_delete_cannot_batch &&
5882 (m_is_bulk_delete || thd_allow_batch(thd));
5883
5884 DBUG_ENTER("ndb_delete_row");
5885 DBUG_ASSERT(trans);
5886
5887 error = check_slave_state(thd);
5888 if (unlikely(error))
5889 DBUG_RETURN(error);
5890
5891 ha_statistic_increment(&SSV::ha_delete_count);
5892 m_rows_changed++;
5893
5894 bool skip_partition_for_unique_index= FALSE;
5895 if (m_use_partition_pruning)
5896 {
5897 if (!cursor && m_read_before_write_removal_used)
5898 {
5899 ndb_index_type type= get_index_type(active_index);
5900 /*
5901 Ndb unique indexes are global so when
5902 m_read_before_write_removal_used is active
5903 the unique index can be used directly for deleting
5904 without finding the partitions
5905 */
5906 if (type == UNIQUE_INDEX ||
5907 type == UNIQUE_ORDERED_INDEX)
5908 {
5909 skip_partition_for_unique_index= TRUE;
5910 goto skip_partition_pruning;
5911 }
5912 }
5913 if ((error= get_part_for_delete(record, table->record[0], m_part_info,
5914 &part_id)))
5915 {
5916 DBUG_RETURN(error);
5917 }
5918 skip_partition_pruning:
5919 (void)0;
5920 }
5921
5922 NdbOperation::OperationOptions options;
5923 NdbOperation::OperationOptions *poptions = NULL;
5924 options.optionsPresent=0;
5925
5926 eventSetAnyValue(thd, &options);
5927
5928 /*
5929 Poor approx. let delete ~ tabsize / 4
5930 */
5931 uint delete_size= 12 + (m_bytes_per_write >> 2);
5932 bool need_flush= add_row_check_if_batch_full_size(thd_ndb, delete_size);
5933
5934 if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5935 {
5936 options.optionsPresent |=
5937 NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5938 }
5939
5940 if (cursor)
5941 {
5942 if (options.optionsPresent != 0)
5943 poptions = &options;
5944
5945 /*
5946 We are scanning records and want to delete the record
5947 that was just found, call deleteTuple on the cursor
5948 to take over the lock to a new delete operation
5949 And thus setting the primary key of the record from
5950 the active record in cursor
5951 */
5952 DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
5953 if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,
5954 NULL, // result_row
5955 NULL, // result_mask
5956 poptions,
5957 sizeof(NdbOperation::OperationOptions))) == 0)
5958 ERR_RETURN(trans->getNdbError());
5959 m_lock_tuple= FALSE;
5960 thd_ndb->m_unsent_bytes+= 12;
5961
5962 no_uncommitted_rows_update(-1);
5963 m_rows_deleted++;
5964
5965 if (!(primary_key_update || m_delete_cannot_batch))
5966 {
5967 // If deleting from cursor, NoCommit will be handled in next_result
5968 DBUG_RETURN(0);
5969 }
5970 }
5971 else
5972 {
5973 const NdbRecord *key_rec;
5974 const uchar *key_row;
5975
5976 if (m_user_defined_partitioning && !skip_partition_for_unique_index)
5977 {
5978 options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
5979 options.partitionId= part_id;
5980 }
5981
5982 setup_key_ref_for_ndb_record(&key_rec, &key_row, record,
5983 m_read_before_write_removal_used);
5984
5985 #ifdef HAVE_NDB_BINLOG
5986 Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
5987 NdbInterpretedCode code(m_table, buffer,
5988 sizeof(buffer)/sizeof(buffer[0]));
5989 if (thd->slave_thread && m_share->m_cfn_share &&
5990 m_share->m_cfn_share->m_conflict_fn)
5991 {
5992 /* Conflict resolution in slave thread. */
5993 if (unlikely((error = prepare_conflict_detection(DELETE_ROW,
5994 key_rec,
5995 key_row, /* old_data */
5996 NULL, /* new_data */
5997 &code,
5998 &options))))
5999 DBUG_RETURN(error);
6000 }
6001 #endif /* HAVE_NDB_BINLOG */
6002 if (options.optionsPresent != 0)
6003 poptions= &options;
6004
6005 if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,
6006 m_ndb_record,
6007 NULL, // row
6008 NULL, // mask
6009 poptions,
6010 sizeof(NdbOperation::OperationOptions))))
6011 ERR_RETURN(trans->getNdbError());
6012
6013 no_uncommitted_rows_update(-1);
6014 m_rows_deleted++;
6015
6016 /*
6017 Check if we can batch the delete.
6018
6019 We don't batch deletes as part of primary key updates.
6020 We do not batch deletes on tables with no primary key. For such tables,
6021 replication uses full table scan to locate the row to delete. The
6022 problem is the following scenario when deleting 2 (or more) rows:
6023
6024 1. Table scan to locate the first row.
6025 2. Delete the row, batched so no execute.
6026 3. Table scan to locate the second row is executed, along with the
6027 batched delete operation from step 2.
6028 4. The first row is returned from nextResult() (not deleted yet).
6029 5. The kernel deletes the row (operation from step 2).
6030 6. lockCurrentTuple() is called on the row returned in step 4. However,
6031 as that row is now deleted, the operation fails and the transaction
6032 is aborted.
6033 7. The delete of the second tuple now fails, as the transaction has
6034 been aborted.
6035 */
6036
6037 if ( allow_batch &&
6038 table_share->primary_key != MAX_KEY &&
6039 !primary_key_update &&
6040 !need_flush)
6041 {
6042 DBUG_RETURN(0);
6043 }
6044 }
6045
6046 // Execute delete operation
6047 uint ignore_count= 0;
6048 if (execute_no_commit(m_thd_ndb, trans,
6049 m_ignore_no_key || m_read_before_write_removal_used,
6050 &ignore_count) != 0)
6051 {
6052 no_uncommitted_rows_execute_failure();
6053 DBUG_RETURN(ndb_err(trans));
6054 }
6055 if (!primary_key_update)
6056 {
6057 assert(m_rows_deleted >= ignore_count);
6058 m_rows_deleted-= ignore_count;
6059 no_uncommitted_rows_update(ignore_count);
6060 }
6061 DBUG_RETURN(0);
6062 }
6063
6064 /**
6065 Unpack a record returned from a scan.
6066 We copy field-for-field to
6067 1. Avoid unnecessary copying for sparse rows.
6068 2. Properly initialize not used null bits.
6069 Note that we do not unpack all returned rows; some primary/unique key
6070 operations can read directly into the destination row.
6071 */
unpack_record(uchar * dst_row,const uchar * src_row)6072 void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)
6073 {
6074 int res;
6075 DBUG_ASSERT(src_row != NULL);
6076
6077 my_ptrdiff_t dst_offset= dst_row - table->record[0];
6078 my_ptrdiff_t src_offset= src_row - table->record[0];
6079
6080 /* Initialize the NULL bitmap. */
6081 memset(dst_row, 0xff, table->s->null_bytes);
6082
6083 uchar *blob_ptr= m_blobs_buffer;
6084
6085 for (uint i= 0; i < table_share->fields; i++)
6086 {
6087 Field *field= table->field[i];
6088 if (bitmap_is_set(table->read_set, i))
6089 {
6090 if (field->type() == MYSQL_TYPE_BIT)
6091 {
6092 Field_bit *field_bit= static_cast<Field_bit*>(field);
6093 if (!field->is_real_null(src_offset))
6094 {
6095 field->move_field_offset(src_offset);
6096 longlong value= field_bit->val_int();
6097 field->move_field_offset(dst_offset-src_offset);
6098 field_bit->set_notnull();
6099 /* Field_bit in DBUG requires the bit set in write_set for store(). */
6100 my_bitmap_map *old_map=
6101 dbug_tmp_use_all_columns(table, table->write_set);
6102 int res = field_bit->store(value, true);
6103 assert(res == 0); NDB_IGNORE_VALUE(res);
6104 dbug_tmp_restore_column_map(table->write_set, old_map);
6105 field->move_field_offset(-dst_offset);
6106 }
6107 }
6108 else if (field->flags & BLOB_FLAG)
6109 {
6110 Field_blob *field_blob= (Field_blob *)field;
6111 NdbBlob *ndb_blob= m_value[i].blob;
6112 /* unpack_record *only* called for scan result processing
6113 * *while* the scan is open and the Blob is active.
6114 * Verify Blob state to be certain.
6115 * Accessing PK/UK op Blobs after execute() is unsafe
6116 */
6117 DBUG_ASSERT(ndb_blob != 0);
6118 DBUG_ASSERT(ndb_blob->getState() == NdbBlob::Active);
6119 int isNull;
6120 res= ndb_blob->getNull(isNull);
6121 DBUG_ASSERT(res == 0); // Already succeeded once
6122 Uint64 len64= 0;
6123 field_blob->move_field_offset(dst_offset);
6124 if (!isNull)
6125 {
6126 res= ndb_blob->getLength(len64);
6127 DBUG_ASSERT(res == 0 && len64 <= (Uint64)0xffffffff);
6128 field->set_notnull();
6129 }
6130 /* Need not set_null(), as we initialized null bits to 1 above. */
6131 field_blob->set_ptr((uint32)len64, blob_ptr);
6132 field_blob->move_field_offset(-dst_offset);
6133 blob_ptr+= (len64 + 7) & ~((Uint64)7);
6134 }
6135 else
6136 {
6137 field->move_field_offset(src_offset);
6138 /* Normal field (not blob or bit type). */
6139 if (!field->is_null())
6140 {
6141 /* Only copy actually used bytes of varstrings. */
6142 uint32 actual_length= field_used_length(field);
6143 uchar *src_ptr= field->ptr;
6144 field->move_field_offset(dst_offset - src_offset);
6145 field->set_notnull();
6146 memcpy(field->ptr, src_ptr, actual_length);
6147 #ifdef HAVE_purify
6148 /*
6149 We get Valgrind warnings on uninitialised padding bytes in
6150 varstrings, for example when writing rows to temporary tables.
6151 So for valgrind builds we pad with zeros, not needed for
6152 production code.
6153 */
6154 if (actual_length < field->pack_length())
6155 memset(field->ptr + actual_length, 0,
6156 field->pack_length() - actual_length);
6157 #endif
6158 field->move_field_offset(-dst_offset);
6159 }
6160 else
6161 field->move_field_offset(-src_offset);
6162 /* No action needed for a NULL field. */
6163 }
6164 }
6165 }
6166 }
6167
6168
6169 /**
6170 Get the default value of the field from default_values of the table.
6171 */
get_default_value(void * def_val,Field * field)6172 static void get_default_value(void *def_val, Field *field)
6173 {
6174 DBUG_ASSERT(field != NULL);
6175
6176 my_ptrdiff_t src_offset= field->table->s->default_values - field->table->record[0];
6177
6178 {
6179 if (bitmap_is_set(field->table->read_set, field->field_index))
6180 {
6181 if (field->type() == MYSQL_TYPE_BIT)
6182 {
6183 Field_bit *field_bit= static_cast<Field_bit*>(field);
6184 if (!field->is_real_null(src_offset))
6185 {
6186 field->move_field_offset(src_offset);
6187 longlong value= field_bit->val_int();
6188 /* Map to NdbApi format - two Uint32s */
6189 Uint32 out[2];
6190 out[0] = 0;
6191 out[1] = 0;
6192 for (int b=0; b < 64; b++)
6193 {
6194 out[b >> 5] |= (value & 1) << (b & 31);
6195
6196 value= value >> 1;
6197 }
6198 memcpy(def_val, out, sizeof(longlong));
6199 field->move_field_offset(-src_offset);
6200 }
6201 }
6202 else if (field->flags & BLOB_FLAG)
6203 {
6204 assert(false);
6205 }
6206 else
6207 {
6208 field->move_field_offset(src_offset);
6209 /* Normal field (not blob or bit type). */
6210 if (!field->is_null())
6211 {
6212 /* Only copy actually used bytes of varstrings. */
6213 uint32 actual_length= field_used_length(field);
6214 uchar *src_ptr= field->ptr;
6215 field->set_notnull();
6216 memcpy(def_val, src_ptr, actual_length);
6217 #ifdef HAVE_purify
6218 if (actual_length < field->pack_length())
6219 memset(((char*)def_val) + actual_length, 0,
6220 field->pack_length() - actual_length);
6221 #endif
6222 }
6223 field->move_field_offset(-src_offset);
6224 /* No action needed for a NULL field. */
6225 }
6226 }
6227 }
6228 }
6229
6230 /*
6231 DBUG_EXECUTE("value", print_results(););
6232 */
6233
print_results()6234 void ha_ndbcluster::print_results()
6235 {
6236 DBUG_ENTER("print_results");
6237
6238 #ifndef DBUG_OFF
6239
6240 char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
6241 String type(buf_type, sizeof(buf_type), &my_charset_bin);
6242 String val(buf_val, sizeof(buf_val), &my_charset_bin);
6243 for (uint f= 0; f < table_share->fields; f++)
6244 {
6245 /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
6246 char buf[2000];
6247 Field *field;
6248 void* ptr;
6249 NdbValue value;
6250
6251 buf[0]= 0;
6252 field= table->field[f];
6253 if (!(value= m_value[f]).ptr)
6254 {
6255 strmov(buf, "not read");
6256 goto print_value;
6257 }
6258
6259 ptr= field->ptr;
6260
6261 if (! (field->flags & BLOB_FLAG))
6262 {
6263 if (value.rec->isNULL())
6264 {
6265 strmov(buf, "NULL");
6266 goto print_value;
6267 }
6268 type.length(0);
6269 val.length(0);
6270 field->sql_type(type);
6271 field->val_str(&val);
6272 my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
6273 }
6274 else
6275 {
6276 NdbBlob *ndb_blob= value.blob;
6277 bool isNull= TRUE;
6278 assert(ndb_blob->getState() == NdbBlob::Active);
6279 ndb_blob->getNull(isNull);
6280 if (isNull)
6281 strmov(buf, "NULL");
6282 }
6283
6284 print_value:
6285 DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
6286 }
6287 #endif
6288 DBUG_VOID_RETURN;
6289 }
6290
6291
6292 /*
6293 Set fields in partition functions in read set for underlying handlers
6294
6295 SYNOPSIS
6296 include_partition_fields_in_used_fields()
6297
6298 RETURN VALUE
6299 NONE
6300
6301 DESCRIPTION
6302 Some handlers only read fields as specified by the bitmap for the
6303 read set. For partitioned handlers we always require that the
6304 fields of the partition functions are read such that we can
6305 calculate the partition id to place updated and deleted records.
6306 */
6307
6308 static void
include_partition_fields_in_used_fields(Field ** ptr,MY_BITMAP * read_set)6309 include_partition_fields_in_used_fields(Field **ptr, MY_BITMAP *read_set)
6310 {
6311 DBUG_ENTER("include_partition_fields_in_used_fields");
6312 do
6313 {
6314 bitmap_set_bit(read_set, (*ptr)->field_index);
6315 } while (*(++ptr));
6316 DBUG_VOID_RETURN;
6317 }
6318
6319
index_init(uint index,bool sorted)6320 int ha_ndbcluster::index_init(uint index, bool sorted)
6321 {
6322 DBUG_ENTER("ha_ndbcluster::index_init");
6323 DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted));
6324 active_index= index;
6325 m_sorted= sorted;
6326 /*
6327 Locks are are explicitly released in scan
6328 unless m_lock.type == TL_READ_HIGH_PRIORITY
6329 and no sub-sequent call to unlock_row()
6330 */
6331 m_lock_tuple= FALSE;
6332 if (table_share->primary_key == MAX_KEY &&
6333 m_use_partition_pruning)
6334 include_partition_fields_in_used_fields(
6335 m_part_info->full_part_field_array,
6336 table->read_set);
6337 DBUG_RETURN(0);
6338 }
6339
6340
index_end()6341 int ha_ndbcluster::index_end()
6342 {
6343 DBUG_ENTER("ha_ndbcluster::index_end");
6344 DBUG_RETURN(close_scan());
6345 }
6346
6347 /**
6348 Check if key contains null.
6349 */
6350 static
6351 int
check_null_in_key(const KEY * key_info,const uchar * key,uint key_len)6352 check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)
6353 {
6354 KEY_PART_INFO *curr_part, *end_part;
6355 const uchar* end_ptr= key + key_len;
6356 curr_part= key_info->key_part;
6357 end_part= curr_part + key_info->user_defined_key_parts;
6358
6359 for (; curr_part != end_part && key < end_ptr; curr_part++)
6360 {
6361 if (curr_part->null_bit && *key)
6362 return 1;
6363
6364 key += curr_part->store_length;
6365 }
6366 return 0;
6367 }
6368
6369
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)6370 int ha_ndbcluster::index_read_idx_map(uchar* buf, uint index,
6371 const uchar* key,
6372 key_part_map keypart_map,
6373 enum ha_rkey_function find_flag)
6374 {
6375 DBUG_ENTER("ha_ndbcluster::index_read_idx_map");
6376 int error= index_init(index, 0);
6377 if (unlikely(error))
6378 DBUG_RETURN(error);
6379
6380 DBUG_RETURN(index_read_map(buf, key, keypart_map, find_flag));
6381 }
6382
6383
index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)6384 int ha_ndbcluster::index_read(uchar *buf,
6385 const uchar *key, uint key_len,
6386 enum ha_rkey_function find_flag)
6387 {
6388 key_range start_key;
6389 bool descending= FALSE;
6390 DBUG_ENTER("ha_ndbcluster::index_read");
6391 DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
6392 active_index, key_len, find_flag));
6393
6394 start_key.key= key;
6395 start_key.length= key_len;
6396 start_key.flag= find_flag;
6397 descending= FALSE;
6398 switch (find_flag) {
6399 case HA_READ_KEY_OR_PREV:
6400 case HA_READ_BEFORE_KEY:
6401 case HA_READ_PREFIX_LAST:
6402 case HA_READ_PREFIX_LAST_OR_PREV:
6403 descending= TRUE;
6404 break;
6405 default:
6406 break;
6407 }
6408 const int error= read_range_first_to_buf(&start_key, 0, descending,
6409 m_sorted, buf);
6410 table->status=error ? STATUS_NOT_FOUND: 0;
6411 DBUG_RETURN(error);
6412 }
6413
6414
index_next(uchar * buf)6415 int ha_ndbcluster::index_next(uchar *buf)
6416 {
6417 DBUG_ENTER("ha_ndbcluster::index_next");
6418 ha_statistic_increment(&SSV::ha_read_next_count);
6419 const int error= next_result(buf);
6420 table->status=error ? STATUS_NOT_FOUND: 0;
6421 DBUG_RETURN(error);
6422 }
6423
6424
index_prev(uchar * buf)6425 int ha_ndbcluster::index_prev(uchar *buf)
6426 {
6427 DBUG_ENTER("ha_ndbcluster::index_prev");
6428 ha_statistic_increment(&SSV::ha_read_prev_count);
6429 const int error= next_result(buf);
6430 table->status=error ? STATUS_NOT_FOUND: 0;
6431 DBUG_RETURN(error);
6432 }
6433
6434
index_first(uchar * buf)6435 int ha_ndbcluster::index_first(uchar *buf)
6436 {
6437 DBUG_ENTER("ha_ndbcluster::index_first");
6438 ha_statistic_increment(&SSV::ha_read_first_count);
6439 // Start the ordered index scan and fetch the first row
6440
6441 // Only HA_READ_ORDER indexes get called by index_first
6442 const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);
6443 table->status=error ? STATUS_NOT_FOUND: 0;
6444 DBUG_RETURN(error);
6445 }
6446
6447
index_last(uchar * buf)6448 int ha_ndbcluster::index_last(uchar *buf)
6449 {
6450 DBUG_ENTER("ha_ndbcluster::index_last");
6451 ha_statistic_increment(&SSV::ha_read_last_count);
6452 const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);
6453 table->status=error ? STATUS_NOT_FOUND: 0;
6454 DBUG_RETURN(error);
6455 }
6456
index_read_last(uchar * buf,const uchar * key,uint key_len)6457 int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
6458 {
6459 DBUG_ENTER("ha_ndbcluster::index_read_last");
6460 DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
6461 }
6462
6463
6464 /**
6465 Read first row (only) from a table.
6466
6467 This is actually (yet) never called for ndbcluster tables, as these table types
6468 does not set HA_STATS_RECORDS_IS_EXACT.
6469
6470 UPDATE: Might be called if the predicate contain '<column> IS NULL', and
6471 <column> is defined as 'NOT NULL' (or is part of primary key)
6472
6473 Implemented regardless of this as the default implememtation would break
6474 any pushed joins as it calls ha_rnd_end() / ha_index_end() at end of execution.
6475 */
read_first_row(uchar * buf,uint primary_key)6476 int ha_ndbcluster::read_first_row(uchar * buf, uint primary_key)
6477 {
6478 register int error;
6479 DBUG_ENTER("ha_ndbcluster::read_first_row");
6480
6481 ha_statistic_increment(&SSV::ha_read_first_count);
6482
6483 /*
6484 If there is very few deleted rows in the table, find the first row by
6485 scanning the table.
6486 TODO remove the test for HA_READ_ORDER
6487 */
6488 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
6489 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
6490 {
6491 (void) ha_rnd_init(1);
6492 while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
6493 }
6494 else
6495 {
6496 /* Find the first row through the primary key */
6497 (void) ha_index_init(primary_key, 0);
6498 error=index_first(buf);
6499 }
6500 DBUG_RETURN(error);
6501 }
6502
6503
read_range_first_to_buf(const key_range * start_key,const key_range * end_key,bool desc,bool sorted,uchar * buf)6504 int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
6505 const key_range *end_key,
6506 bool desc, bool sorted,
6507 uchar* buf)
6508 {
6509 part_id_range part_spec;
6510 ndb_index_type type= get_index_type(active_index);
6511 const KEY* key_info= table->key_info+active_index;
6512 int error;
6513 DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
6514 DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
6515
6516 if (unlikely((error= close_scan())))
6517 DBUG_RETURN(error);
6518
6519 if (m_use_partition_pruning)
6520 {
6521 DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);
6522 get_partition_set(table, buf, active_index, start_key, &part_spec);
6523 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
6524 part_spec.start_part, part_spec.end_part));
6525 /*
6526 If partition pruning has found no partition in set
6527 we can return HA_ERR_END_OF_FILE
6528 If partition pruning has found exactly one partition in set
6529 we can optimize scan to run towards that partition only.
6530 */
6531 if (part_spec.start_part > part_spec.end_part)
6532 {
6533 DBUG_RETURN(HA_ERR_END_OF_FILE);
6534 }
6535
6536 if (part_spec.start_part == part_spec.end_part)
6537 {
6538 /*
6539 Only one partition is required to scan, if sorted is required we
6540 don't need it any more since output from one ordered partitioned
6541 index is always sorted.
6542 */
6543 sorted= FALSE;
6544 if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))
6545 {
6546 DBUG_RETURN(error);
6547 }
6548 }
6549 }
6550
6551 switch (type){
6552 case PRIMARY_KEY_ORDERED_INDEX:
6553 case PRIMARY_KEY_INDEX:
6554 if (start_key &&
6555 start_key->length == key_info->key_length &&
6556 start_key->flag == HA_READ_KEY_EXACT)
6557 {
6558 if (!m_thd_ndb->trans)
6559 if (unlikely(!start_transaction_key(active_index,
6560 start_key->key, error)))
6561 DBUG_RETURN(error);
6562 error= pk_read(start_key->key, start_key->length, buf,
6563 (m_use_partition_pruning)? &(part_spec.start_part) : NULL);
6564 DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
6565 }
6566 break;
6567 case UNIQUE_ORDERED_INDEX:
6568 case UNIQUE_INDEX:
6569 if (start_key && start_key->length == key_info->key_length &&
6570 start_key->flag == HA_READ_KEY_EXACT &&
6571 !check_null_in_key(key_info, start_key->key, start_key->length))
6572 {
6573 if (!m_thd_ndb->trans)
6574 if (unlikely(!start_transaction_key(active_index,
6575 start_key->key, error)))
6576 DBUG_RETURN(error);
6577 error= unique_index_read(start_key->key, start_key->length, buf);
6578 DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
6579 }
6580 else if (type == UNIQUE_INDEX)
6581 DBUG_RETURN(full_table_scan(key_info,
6582 start_key,
6583 end_key,
6584 buf));
6585 break;
6586 default:
6587 break;
6588 }
6589 if (!m_use_partition_pruning && !m_thd_ndb->trans)
6590 {
6591 get_partition_set(table, buf, active_index, start_key, &part_spec);
6592 if (part_spec.start_part == part_spec.end_part)
6593 if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))
6594 DBUG_RETURN(error);
6595 }
6596 // Start the ordered index scan and fetch the first row
6597 DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
6598 (m_use_partition_pruning)? &part_spec : NULL));
6599 }
6600
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_r,bool sorted)6601 int ha_ndbcluster::read_range_first(const key_range *start_key,
6602 const key_range *end_key,
6603 bool eq_r, bool sorted)
6604 {
6605 uchar* buf= table->record[0];
6606 DBUG_ENTER("ha_ndbcluster::read_range_first");
6607 DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
6608 sorted, buf));
6609 }
6610
read_range_next()6611 int ha_ndbcluster::read_range_next()
6612 {
6613 DBUG_ENTER("ha_ndbcluster::read_range_next");
6614 DBUG_RETURN(next_result(table->record[0]));
6615 }
6616
6617
rnd_init(bool scan)6618 int ha_ndbcluster::rnd_init(bool scan)
6619 {
6620 int error;
6621 DBUG_ENTER("rnd_init");
6622 DBUG_PRINT("enter", ("scan: %d", scan));
6623
6624 if ((error= close_scan()))
6625 DBUG_RETURN(error);
6626 index_init(table_share->primary_key, 0);
6627 DBUG_RETURN(0);
6628 }
6629
close_scan()6630 int ha_ndbcluster::close_scan()
6631 {
6632 /*
6633 workaround for bug #39872 - explain causes segv
6634 - rnd_end/close_scan is called on unlocked table
6635 - should be fixed in server code, but this will
6636 not be done until 6.0 as it is too intrusive
6637 */
6638 if (m_thd_ndb == NULL)
6639 return 0;
6640 NdbTransaction *trans= m_thd_ndb->trans;
6641 int error;
6642 DBUG_ENTER("close_scan");
6643
6644 if (m_active_query)
6645 {
6646 m_active_query->close(m_thd_ndb->m_force_send);
6647 m_active_query= NULL;
6648 }
6649
6650 NdbScanOperation *cursor= m_active_cursor;
6651
6652 if (!cursor)
6653 {
6654 cursor = m_multi_cursor;
6655 if (!cursor)
6656 DBUG_RETURN(0);
6657 }
6658
6659 if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
6660 DBUG_RETURN(error);
6661
6662 if (m_thd_ndb->m_unsent_bytes)
6663 {
6664 /*
6665 Take over any pending transactions to the
6666 deleteing/updating transaction before closing the scan
6667 */
6668 DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
6669 (long) m_thd_ndb->m_unsent_bytes));
6670 if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
6671 {
6672 no_uncommitted_rows_execute_failure();
6673 DBUG_RETURN(ndb_err(trans));
6674 }
6675 }
6676
6677 cursor->close(m_thd_ndb->m_force_send, TRUE);
6678 m_active_cursor= NULL;
6679 m_multi_cursor= NULL;
6680 DBUG_RETURN(0);
6681 }
6682
rnd_end()6683 int ha_ndbcluster::rnd_end()
6684 {
6685 DBUG_ENTER("rnd_end");
6686 DBUG_RETURN(close_scan());
6687 }
6688
6689
rnd_next(uchar * buf)6690 int ha_ndbcluster::rnd_next(uchar *buf)
6691 {
6692 DBUG_ENTER("rnd_next");
6693 ha_statistic_increment(&SSV::ha_read_rnd_next_count);
6694
6695 int error;
6696 if (m_active_cursor)
6697 error= next_result(buf);
6698 else if (m_active_query)
6699 error= next_result(buf);
6700 else
6701 error= full_table_scan(NULL, NULL, NULL, buf);
6702
6703 table->status= error ? STATUS_NOT_FOUND: 0;
6704 DBUG_RETURN(error);
6705 }
6706
6707
6708 /**
6709 An "interesting" record has been found and it's pk
6710 retrieved by calling position. Now it's time to read
6711 the record from db once again.
6712 */
6713
rnd_pos(uchar * buf,uchar * pos)6714 int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
6715 {
6716 DBUG_ENTER("rnd_pos");
6717 ha_statistic_increment(&SSV::ha_read_rnd_count);
6718 // The primary key for the record is stored in pos
6719 // Perform a pk_read using primary key "index"
6720 {
6721 part_id_range part_spec;
6722 uint key_length= ref_length;
6723 if (m_user_defined_partitioning)
6724 {
6725 if (table_share->primary_key == MAX_KEY)
6726 {
6727 /*
6728 The partition id has been fetched from ndb
6729 and has been stored directly after the hidden key
6730 */
6731 DBUG_DUMP("key+part", pos, key_length);
6732 key_length= ref_length - sizeof(m_part_id);
6733 part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
6734 }
6735 else
6736 {
6737 key_range key_spec;
6738 KEY *key_info= table->key_info + table_share->primary_key;
6739 key_spec.key= pos;
6740 key_spec.length= key_length;
6741 key_spec.flag= HA_READ_KEY_EXACT;
6742 get_full_part_id_from_key(table, buf, key_info,
6743 &key_spec, &part_spec);
6744 DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
6745 }
6746 DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
6747 }
6748 DBUG_DUMP("key", pos, key_length);
6749 int res= pk_read(pos, key_length, buf,
6750 (m_user_defined_partitioning) ?
6751 &(part_spec.start_part)
6752 : NULL);
6753 if (res == HA_ERR_KEY_NOT_FOUND)
6754 {
6755 /**
6756 * When using rnd_pos
6757 * server first retrives a set of records (typically scans them)
6758 * and store a unique identifier (for ndb this is the primary key)
6759 * and later retreives the record again using rnd_pos and the
6760 * saved primary key. For ndb, since we only support committed read
6761 * the record could have been deleted in between the "save" and
6762 * the rnd_pos.
6763 * Therefor we return HA_ERR_RECORD_DELETED in this case rather than
6764 * HA_ERR_KEY_NOT_FOUND (which will cause statment to be aborted)
6765 *
6766 */
6767 res= HA_ERR_RECORD_DELETED;
6768 }
6769 table->status= res ? STATUS_NOT_FOUND: 0;
6770 DBUG_RETURN(res);
6771 }
6772 }
6773
6774
6775 /**
6776 Store the primary key of this record in ref
6777 variable, so that the row can be retrieved again later
6778 using "reference" in rnd_pos.
6779 */
6780
position(const uchar * record)6781 void ha_ndbcluster::position(const uchar *record)
6782 {
6783 KEY *key_info;
6784 KEY_PART_INFO *key_part;
6785 KEY_PART_INFO *end;
6786 uchar *buff;
6787 uint key_length;
6788
6789 DBUG_ENTER("position");
6790
6791 if (table_share->primary_key != MAX_KEY)
6792 {
6793 key_length= ref_length;
6794 key_info= table->key_info + table_share->primary_key;
6795 key_part= key_info->key_part;
6796 end= key_part + key_info->user_defined_key_parts;
6797 buff= ref;
6798
6799 for (; key_part != end; key_part++)
6800 {
6801 if (key_part->null_bit) {
6802 /* Store 0 if the key part is a NULL part */
6803 if (record[key_part->null_offset]
6804 & key_part->null_bit) {
6805 *buff++= 1;
6806 continue;
6807 }
6808 *buff++= 0;
6809 }
6810
6811 size_t len = key_part->length;
6812 const uchar * ptr = record + key_part->offset;
6813 Field *field = key_part->field;
6814 if (field->type() == MYSQL_TYPE_VARCHAR)
6815 {
6816 if (((Field_varstring*)field)->length_bytes == 1)
6817 {
6818 /**
6819 * Keys always use 2 bytes length
6820 */
6821 buff[0] = ptr[0];
6822 buff[1] = 0;
6823 memcpy(buff+2, ptr + 1, len);
6824 }
6825 else
6826 {
6827 memcpy(buff, ptr, len + 2);
6828 }
6829 len += 2;
6830 }
6831 else
6832 {
6833 memcpy(buff, ptr, len);
6834 }
6835 buff += len;
6836 }
6837 }
6838 else
6839 {
6840 // No primary key, get hidden key
6841 DBUG_PRINT("info", ("Getting hidden key"));
6842 // If table has user defined partition save the partition id as well
6843 if (m_user_defined_partitioning)
6844 {
6845 DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
6846 key_length= ref_length - sizeof(m_part_id);
6847 memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
6848 }
6849 else
6850 key_length= ref_length;
6851 #ifndef DBUG_OFF
6852 int hidden_no= table->s->fields;
6853 const NDBTAB *tab= m_table;
6854 const NDBCOL *hidden_col= tab->getColumn(hidden_no);
6855 DBUG_ASSERT(hidden_col->getPrimaryKey() &&
6856 hidden_col->getAutoIncrement() &&
6857 key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
6858 #endif
6859 memcpy(ref, &m_ref, key_length);
6860 }
6861 #ifndef DBUG_OFF
6862 if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning)
6863 DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));
6864 #endif
6865 DBUG_DUMP("ref", ref, key_length);
6866 DBUG_VOID_RETURN;
6867 }
6868
6869 int
cmp_ref(const uchar * ref1,const uchar * ref2)6870 ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)
6871 {
6872 DBUG_ENTER("cmp_ref");
6873
6874 if (table_share->primary_key != MAX_KEY)
6875 {
6876 KEY *key_info= table->key_info + table_share->primary_key;
6877 KEY_PART_INFO *key_part= key_info->key_part;
6878 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
6879
6880 for (; key_part != end; key_part++)
6881 {
6882 // NOTE: No need to check for null since PK is not-null
6883
6884 Field *field= key_part->field;
6885 int result= field->key_cmp(ref1, ref2);
6886 if (result)
6887 {
6888 DBUG_RETURN(result);
6889 }
6890
6891 if (field->type() == MYSQL_TYPE_VARCHAR)
6892 {
6893 ref1+= 2;
6894 ref2+= 2;
6895 }
6896
6897 ref1+= key_part->length;
6898 ref2+= key_part->length;
6899 }
6900 DBUG_RETURN(0);
6901 }
6902 else
6903 {
6904 DBUG_RETURN(memcmp(ref1, ref2, ref_length));
6905 }
6906 }
6907
info(uint flag)6908 int ha_ndbcluster::info(uint flag)
6909 {
6910 THD *thd= table->in_use;
6911 int result= 0;
6912 DBUG_ENTER("info");
6913 DBUG_PRINT("enter", ("flag: %d", flag));
6914
6915 if (flag & HA_STATUS_POS)
6916 DBUG_PRINT("info", ("HA_STATUS_POS"));
6917 if (flag & HA_STATUS_TIME)
6918 DBUG_PRINT("info", ("HA_STATUS_TIME"));
6919 while (flag & HA_STATUS_VARIABLE)
6920 {
6921 if (!thd)
6922 thd= current_thd;
6923 DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
6924
6925 if (!m_table_info)
6926 {
6927 if ((my_errno= check_ndb_connection(thd)))
6928 DBUG_RETURN(my_errno);
6929 }
6930
6931 /*
6932 May need to update local copy of statistics in
6933 'm_table_info', either directly from datanodes,
6934 or from shared (mutex protected) cached copy, if:
6935 1) 'use_exact_count' has been set (by config or user).
6936 2) HA_STATUS_NO_LOCK -> read from shared cached copy.
6937 3) Local copy is invalid.
6938 */
6939 bool exact_count= THDVAR(thd, use_exact_count);
6940 if (exact_count || // 1)
6941 !(flag & HA_STATUS_NO_LOCK) || // 2)
6942 m_table_info == NULL || // 3)
6943 m_table_info->records == ~(ha_rows)0) // 3)
6944 {
6945 result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));
6946 if (result)
6947 DBUG_RETURN(result);
6948 }
6949 /* Read from local statistics, fast and fuzzy, wo/ locks */
6950 else
6951 {
6952 DBUG_ASSERT(m_table_info->records != ~(ha_rows)0);
6953 stats.records= m_table_info->records +
6954 m_table_info->no_uncommitted_rows_count;
6955 }
6956
6957 if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&
6958 thd->lex->sql_command != SQLCOM_SHOW_KEYS)
6959 {
6960 /*
6961 just use whatever stats we have. However,
6962 optimizer interprets the values 0 and 1 as EXACT:
6963 -> < 2 should not be returned.
6964 */
6965 if (stats.records < 2)
6966 stats.records= 2;
6967 }
6968 break;
6969 }
6970 /* RPK moved to variable part */
6971 if (flag & HA_STATUS_VARIABLE)
6972 {
6973 /* No meaningful way to return error */
6974 DBUG_PRINT("info", ("rec_per_key"));
6975 set_rec_per_key();
6976 }
6977 if (flag & HA_STATUS_ERRKEY)
6978 {
6979 DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
6980 errkey= m_dupkey;
6981 }
6982 if (flag & HA_STATUS_AUTO)
6983 {
6984 DBUG_PRINT("info", ("HA_STATUS_AUTO"));
6985 if (m_table && table->found_next_number_field)
6986 {
6987 if (!thd)
6988 thd= current_thd;
6989 if ((my_errno= check_ndb_connection(thd)))
6990 DBUG_RETURN(my_errno);
6991 Ndb *ndb= get_ndb(thd);
6992 Ndb_tuple_id_range_guard g(m_share);
6993
6994 Uint64 auto_increment_value64;
6995 if (ndb->readAutoIncrementValue(m_table, g.range,
6996 auto_increment_value64) == -1)
6997 {
6998 const NdbError err= ndb->getNdbError();
6999 sql_print_error("Error %lu in readAutoIncrementValue(): %s",
7000 (ulong) err.code, err.message);
7001 stats.auto_increment_value= ~(ulonglong)0;
7002 }
7003 else
7004 stats.auto_increment_value= (ulonglong)auto_increment_value64;
7005 }
7006 }
7007
7008 if(result == -1)
7009 result= HA_ERR_NO_CONNECTION;
7010
7011 DBUG_RETURN(result);
7012 }
7013
7014
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)7015 void ha_ndbcluster::get_dynamic_partition_info(PARTITION_STATS *stat_info,
7016 uint part_id)
7017 {
7018 DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));
7019
7020 memset(stat_info, 0, sizeof(PARTITION_STATS));
7021 int error = 0;
7022 THD *thd = table->in_use;
7023
7024 if (!thd)
7025 thd = current_thd;
7026 if (!m_table_info)
7027 {
7028 if ((error = check_ndb_connection(thd)))
7029 goto err;
7030 }
7031 error = update_stats(thd, 1, false, part_id);
7032
7033 if (error == 0)
7034 {
7035 stat_info->records = stats.records;
7036 stat_info->mean_rec_length = stats.mean_rec_length;
7037 stat_info->data_file_length = stats.data_file_length;
7038 stat_info->delete_length = stats.delete_length;
7039 stat_info->max_data_file_length = stats.max_data_file_length;
7040 return;
7041 }
7042
7043 err:
7044
7045 DBUG_PRINT("warning",
7046 ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u",
7047 error));
7048 }
7049
7050
extra(enum ha_extra_function operation)7051 int ha_ndbcluster::extra(enum ha_extra_function operation)
7052 {
7053 DBUG_ENTER("extra");
7054 switch (operation) {
7055 case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/
7056 DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
7057 DBUG_PRINT("info", ("Ignoring duplicate key"));
7058 m_ignore_dup_key= TRUE;
7059 break;
7060 case HA_EXTRA_NO_IGNORE_DUP_KEY:
7061 DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
7062 m_ignore_dup_key= FALSE;
7063 break;
7064 case HA_EXTRA_IGNORE_NO_KEY:
7065 DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
7066 DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7067 m_ignore_no_key= TRUE;
7068 break;
7069 case HA_EXTRA_NO_IGNORE_NO_KEY:
7070 DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
7071 DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7072 m_ignore_no_key= FALSE;
7073 break;
7074 case HA_EXTRA_WRITE_CAN_REPLACE:
7075 DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
7076 if (!m_has_unique_index ||
7077 current_thd->slave_thread || /* always set if slave, quick fix for bug 27378 */
7078 isManualBinlogExec(current_thd)) /* or if manual binlog application, for bug 46662 */
7079 {
7080 DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
7081 m_use_write= TRUE;
7082 }
7083 break;
7084 case HA_EXTRA_WRITE_CANNOT_REPLACE:
7085 DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
7086 DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
7087 m_use_write= FALSE;
7088 break;
7089 case HA_EXTRA_DELETE_CANNOT_BATCH:
7090 DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
7091 m_delete_cannot_batch= TRUE;
7092 break;
7093 case HA_EXTRA_UPDATE_CANNOT_BATCH:
7094 DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
7095 m_update_cannot_batch= TRUE;
7096 break;
7097 // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.
7098 case HA_EXTRA_KEYREAD:
7099 DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
7100 m_disable_pushed_join= TRUE;
7101 break;
7102 case HA_EXTRA_NO_KEYREAD:
7103 DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
7104 m_disable_pushed_join= FALSE;
7105 break;
7106 default:
7107 break;
7108 }
7109
7110 DBUG_RETURN(0);
7111 }
7112
7113
start_read_removal()7114 bool ha_ndbcluster::start_read_removal()
7115 {
7116 THD *thd= table->in_use;
7117 DBUG_ENTER("start_read_removal");
7118
7119 if (uses_blob_value(table->write_set))
7120 {
7121 DBUG_PRINT("exit", ("No! Blob field in write_set"));
7122 DBUG_RETURN(false);
7123 }
7124
7125 if (thd->lex->sql_command == SQLCOM_DELETE &&
7126 table_share->blob_fields)
7127 {
7128 DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));
7129 DBUG_RETURN(false);
7130 }
7131
7132 if (table_share->primary_key == MAX_KEY)
7133 {
7134 DBUG_PRINT("exit", ("No! Table with hidden key"));
7135 DBUG_RETURN(false);
7136 }
7137
7138 if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
7139 {
7140 DBUG_PRINT("exit", ("No! Updating primary key"));
7141 DBUG_RETURN(false);
7142 }
7143
7144 if (m_has_unique_index)
7145 {
7146 for (uint i= 0; i < table_share->keys; i++)
7147 {
7148 const KEY* key= table->key_info + i;
7149 if ((key->flags & HA_NOSAME) &&
7150 bitmap_is_overlapping(table->write_set,
7151 m_key_fields[i]))
7152 {
7153 DBUG_PRINT("exit", ("No! Unique key %d is updated", i));
7154 DBUG_RETURN(false);
7155 }
7156 }
7157 }
7158 m_read_before_write_removal_possible= TRUE;
7159 DBUG_PRINT("exit", ("Yes, rbwr is possible!"));
7160 DBUG_RETURN(true);
7161 }
7162
7163
end_read_removal(void)7164 ha_rows ha_ndbcluster::end_read_removal(void)
7165 {
7166 DBUG_ENTER("end_read_removal");
7167 DBUG_ASSERT(m_read_before_write_removal_possible);
7168 DBUG_PRINT("info", ("updated: %llu, deleted: %llu",
7169 m_rows_updated, m_rows_deleted));
7170 DBUG_RETURN(m_rows_updated + m_rows_deleted);
7171 }
7172
7173
reset()7174 int ha_ndbcluster::reset()
7175 {
7176 DBUG_ENTER("ha_ndbcluster::reset");
7177 if (m_cond)
7178 {
7179 m_cond->cond_clear();
7180 }
7181 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
7182 DBUG_ASSERT(m_active_query == NULL);
7183 if (m_pushed_join_operation==PUSHED_ROOT) // Root of pushed query
7184 {
7185 delete m_pushed_join_member; // Also delete QueryDef
7186 }
7187 m_pushed_join_member= NULL;
7188 m_pushed_join_operation= -1;
7189 m_disable_pushed_join= FALSE;
7190 #endif
7191
7192 #if 0
7193 // Magnus, disble this "hack" until it's possible to test if
7194 // it's still needed
7195 /*
7196 Regular partition pruning will set the bitmap appropriately.
7197 Some queries like ALTER TABLE doesn't use partition pruning and
7198 thus the 'used_partitions' bitmap needs to be initialized
7199 */
7200 if (m_part_info)
7201 bitmap_set_all(&m_part_info->used_partitions);
7202 #endif
7203
7204 /* reset flags set by extra calls */
7205 m_read_before_write_removal_possible= FALSE;
7206 m_read_before_write_removal_used= FALSE;
7207 m_rows_updated= m_rows_deleted= 0;
7208 m_ignore_dup_key= FALSE;
7209 m_use_write= FALSE;
7210 m_ignore_no_key= FALSE;
7211 m_rows_inserted= (ha_rows) 0;
7212 m_rows_to_insert= (ha_rows) 1;
7213 m_delete_cannot_batch= FALSE;
7214 m_update_cannot_batch= FALSE;
7215
7216 assert(m_is_bulk_delete == false);
7217 m_is_bulk_delete = false;
7218 DBUG_RETURN(0);
7219 }
7220
7221
7222 /**
7223 Start of an insert, remember number of rows to be inserted, it will
7224 be used in write_row and get_autoincrement to send an optimal number
7225 of rows in each roundtrip to the server.
7226
7227 @param
7228 rows number of rows to insert, 0 if unknown
7229 */
7230
7231 int
flush_bulk_insert(bool allow_batch)7232 ha_ndbcluster::flush_bulk_insert(bool allow_batch)
7233 {
7234 NdbTransaction *trans= m_thd_ndb->trans;
7235 DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");
7236 DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d",
7237 (int)m_rows_inserted));
7238 DBUG_ASSERT(trans);
7239
7240
7241 if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))
7242 {
7243 if (!allow_batch &&
7244 execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7245 {
7246 no_uncommitted_rows_execute_failure();
7247 DBUG_RETURN(ndb_err(trans));
7248 }
7249 }
7250 else
7251 {
7252 /*
7253 signal that transaction has been broken up and hence cannot
7254 be rolled back
7255 */
7256 THD *thd= table->in_use;
7257 thd->transaction.all.mark_modified_non_trans_table();
7258 thd->transaction.stmt.mark_modified_non_trans_table();
7259 if (execute_commit(thd, m_thd_ndb, trans, m_thd_ndb->m_force_send,
7260 m_ignore_no_key) != 0)
7261 {
7262 no_uncommitted_rows_execute_failure();
7263 DBUG_RETURN(ndb_err(trans));
7264 }
7265 if (trans->restart() != 0)
7266 {
7267 DBUG_ASSERT(0);
7268 DBUG_RETURN(-1);
7269 }
7270 }
7271 DBUG_RETURN(0);
7272 }
7273
start_bulk_insert(ha_rows rows)7274 void ha_ndbcluster::start_bulk_insert(ha_rows rows)
7275 {
7276 DBUG_ENTER("start_bulk_insert");
7277 DBUG_PRINT("enter", ("rows: %d", (int)rows));
7278
7279 m_rows_inserted= (ha_rows) 0;
7280 if (!m_use_write && m_ignore_dup_key)
7281 {
7282 /*
7283 compare if expression with that in write_row
7284 we have a situation where peek_indexed_rows() will be called
7285 so we cannot batch
7286 */
7287 DBUG_PRINT("info", ("Batching turned off as duplicate key is "
7288 "ignored by using peek_row"));
7289 m_rows_to_insert= 1;
7290 DBUG_VOID_RETURN;
7291 }
7292 if (rows == (ha_rows) 0)
7293 {
7294 /* We don't know how many will be inserted, guess */
7295 m_rows_to_insert=
7296 (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)
7297 ? m_autoincrement_prefetch
7298 : DEFAULT_AUTO_PREFETCH;
7299 m_autoincrement_prefetch= m_rows_to_insert;
7300 }
7301 else
7302 {
7303 m_rows_to_insert= rows;
7304 if (m_autoincrement_prefetch < m_rows_to_insert)
7305 m_autoincrement_prefetch= m_rows_to_insert;
7306 }
7307
7308 DBUG_VOID_RETURN;
7309 }
7310
7311 /**
7312 End of an insert.
7313 */
end_bulk_insert()7314 int ha_ndbcluster::end_bulk_insert()
7315 {
7316 int error= 0;
7317
7318 DBUG_ENTER("end_bulk_insert");
7319 // Check if last inserts need to be flushed
7320
7321 THD *thd= table->in_use;
7322 Thd_ndb *thd_ndb= m_thd_ndb;
7323
7324 if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)
7325 {
7326 bool allow_batch= (thd_ndb->m_handler != 0);
7327 error= flush_bulk_insert(allow_batch);
7328 if (error != 0)
7329 my_errno= error;
7330 }
7331
7332 m_rows_inserted= (ha_rows) 0;
7333 m_rows_to_insert= (ha_rows) 1;
7334 DBUG_RETURN(error);
7335 }
7336
7337
extra_opt(enum ha_extra_function operation,ulong cache_size)7338 int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
7339 {
7340 DBUG_ENTER("extra_opt");
7341 DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
7342 DBUG_RETURN(extra(operation));
7343 }
7344
7345 static const char *ha_ndbcluster_exts[] = {
7346 ha_ndb_ext,
7347 NullS
7348 };
7349
bas_ext() const7350 const char** ha_ndbcluster::bas_ext() const
7351 {
7352 return ha_ndbcluster_exts;
7353 }
7354
7355 /**
7356 How many seeks it will take to read through the table.
7357
7358 This is to be comparable to the number returned by records_in_range so
7359 that we can decide if we should scan the table or use keys.
7360 */
7361
scan_time()7362 double ha_ndbcluster::scan_time()
7363 {
7364 DBUG_ENTER("ha_ndbcluster::scan_time()");
7365 double res= rows2double(stats.records*1000);
7366 DBUG_PRINT("exit", ("table: %s value: %f",
7367 m_tabname, res));
7368 DBUG_RETURN(res);
7369 }
7370
7371 /*
7372 Convert MySQL table locks into locks supported by Ndb Cluster.
7373 Note that MySQL Cluster does currently not support distributed
7374 table locks, so to be safe one should set cluster in Single
7375 User Mode, before relying on table locks when updating tables
7376 from several MySQL servers
7377 */
7378
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)7379 THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
7380 THR_LOCK_DATA **to,
7381 enum thr_lock_type lock_type)
7382 {
7383 DBUG_ENTER("store_lock");
7384 if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK)
7385 {
7386
7387 /* If we are not doing a LOCK TABLE, then allow multiple
7388 writers */
7389
7390 /* Since NDB does not currently have table locks
7391 this is treated as a ordinary lock */
7392
7393 const bool in_lock_tables = thd_in_lock_tables(thd);
7394 const uint sql_command = thd_sql_command(thd);
7395 if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
7396 lock_type <= TL_WRITE) &&
7397 !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))
7398 lock_type= TL_WRITE_ALLOW_WRITE;
7399
7400 /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
7401 MySQL would use the lock TL_READ_NO_INSERT on t2, and that
7402 would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
7403 to t2. Convert the lock to a normal read lock to allow
7404 concurrent inserts to t2. */
7405
7406 if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
7407 lock_type= TL_READ;
7408
7409 /**
7410 * We need locks on source table when
7411 * doing offline alter...
7412 * In 5.1 this worked due to TL_WRITE_ALLOW_READ...
7413 * but that has been removed in 5.5
7414 * I simply add this to get it...
7415 */
7416 if (sql_command == SQLCOM_ALTER_TABLE)
7417 lock_type = TL_WRITE;
7418
7419 m_lock.type=lock_type;
7420 }
7421 *to++= &m_lock;
7422
7423 DBUG_PRINT("exit", ("lock_type: %d", lock_type));
7424
7425 DBUG_RETURN(to);
7426 }
7427
7428 /*
7429 As MySQL will execute an external lock for every new table it uses
7430 we can use this to start the transactions.
7431 If we are in auto_commit mode we just need to start a transaction
7432 for the statement, this will be stored in thd_ndb.stmt.
7433 If not, we have to start a master transaction if there doesn't exist
7434 one from before, this will be stored in thd_ndb.all
7435
7436 When a table lock is held one transaction will be started which holds
7437 the table lock and for each statement a hupp transaction will be started
7438 If we are locking the table then:
7439 - save the NdbDictionary::Table for easy access
7440 - save reference to table statistics
7441 - refresh list of the indexes for the table if needed (if altered)
7442 */
7443
7444 #ifdef HAVE_NDB_BINLOG
ndbcluster_update_apply_status(THD * thd,int do_update)7445 static int ndbcluster_update_apply_status(THD *thd, int do_update)
7446 {
7447 Thd_ndb *thd_ndb= get_thd_ndb(thd);
7448 Ndb *ndb= thd_ndb->ndb;
7449 NDBDICT *dict= ndb->getDictionary();
7450 const NDBTAB *ndbtab;
7451 NdbTransaction *trans= thd_ndb->trans;
7452 ndb->setDatabaseName(NDB_REP_DB);
7453 Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
7454 if (!(ndbtab= ndbtab_g.get_table()))
7455 {
7456 return -1;
7457 }
7458 NdbOperation *op= 0;
7459 int r= 0;
7460 r|= (op= trans->getNdbOperation(ndbtab)) == 0;
7461 DBUG_ASSERT(r == 0);
7462 if (do_update)
7463 r|= op->updateTuple();
7464 else
7465 r|= op->writeTuple();
7466 DBUG_ASSERT(r == 0);
7467 // server_id
7468 r|= op->equal(0u, (Uint32)thd->server_id);
7469 DBUG_ASSERT(r == 0);
7470 if (!do_update)
7471 {
7472 // epoch
7473 r|= op->setValue(1u, (Uint64)0);
7474 DBUG_ASSERT(r == 0);
7475 }
7476 const char* group_master_log_name =
7477 ndb_mi_get_group_master_log_name();
7478 const Uint64 group_master_log_pos =
7479 ndb_mi_get_group_master_log_pos();
7480 const Uint64 future_event_relay_log_pos =
7481 ndb_mi_get_future_event_relay_log_pos();
7482 const Uint64 group_relay_log_pos =
7483 ndb_mi_get_group_relay_log_pos();
7484
7485 // log_name
7486 char tmp_buf[FN_REFLEN];
7487 ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
7488 group_master_log_name, strlen(group_master_log_name));
7489 r|= op->setValue(2u, tmp_buf);
7490 DBUG_ASSERT(r == 0);
7491 // start_pos
7492 r|= op->setValue(3u, group_master_log_pos);
7493 DBUG_ASSERT(r == 0);
7494 // end_pos
7495 r|= op->setValue(4u, group_master_log_pos +
7496 (future_event_relay_log_pos - group_relay_log_pos));
7497 DBUG_ASSERT(r == 0);
7498 return 0;
7499 }
7500 #endif /* HAVE_NDB_BINLOG */
7501
transaction_checks(THD * thd,Thd_ndb * thd_ndb)7502 static void transaction_checks(THD *thd, Thd_ndb *thd_ndb)
7503 {
7504 if (thd->lex->sql_command == SQLCOM_LOAD)
7505 thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7506 else if (!thd->transaction.flags.enabled)
7507 thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7508 else if (!THDVAR(thd, use_transactions))
7509 thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7510 thd_ndb->m_force_send= THDVAR(thd, force_send);
7511 if (!thd->slave_thread)
7512 thd_ndb->m_batch_size= THDVAR(thd, batch_size);
7513 else
7514 {
7515 thd_ndb->m_batch_size= THDVAR(NULL, batch_size); /* using global value */
7516 /* Do not use hinted TC selection in slave thread */
7517 THDVAR(thd, optimized_node_selection)=
7518 THDVAR(NULL, optimized_node_selection) & 1; /* using global value */
7519 }
7520 }
7521
start_statement(THD * thd,Thd_ndb * thd_ndb,uint table_count)7522 int ha_ndbcluster::start_statement(THD *thd,
7523 Thd_ndb *thd_ndb,
7524 uint table_count)
7525 {
7526 NdbTransaction *trans= thd_ndb->trans;
7527 int error;
7528 DBUG_ENTER("ha_ndbcluster::start_statement");
7529
7530 m_thd_ndb= thd_ndb;
7531 transaction_checks(thd, m_thd_ndb);
7532
7533 if (table_count == 0)
7534 {
7535 trans_register_ha(thd, FALSE, ndbcluster_hton);
7536 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
7537 {
7538 if (!trans)
7539 trans_register_ha(thd, TRUE, ndbcluster_hton);
7540 thd_ndb->m_handler= NULL;
7541 }
7542 else
7543 {
7544 /*
7545 this is an autocommit, we may keep a reference to the
7546 handler to be used in the commit phase for optimization
7547 reasons, defering execute
7548 */
7549 thd_ndb->m_handler= this;
7550 }
7551 }
7552 else
7553 {
7554 /*
7555 there is more than one handler involved, execute deferal
7556 not possible
7557 */
7558 ha_ndbcluster* handler = thd_ndb->m_handler;
7559 thd_ndb->m_handler= NULL;
7560 if (handler != NULL)
7561 {
7562 /**
7563 * If we initially belived that this could be run
7564 * using execute deferal...but changed out mind
7565 * add handler to thd_ndb->open_tables like it would
7566 * have done "normally"
7567 */
7568 add_handler_to_open_tables(thd, thd_ndb, handler);
7569 }
7570 }
7571 if (!trans && table_count == 0)
7572 {
7573 DBUG_ASSERT(thd_ndb->changed_tables.is_empty() == TRUE);
7574 thd_ndb->trans_options= 0;
7575
7576 DBUG_PRINT("trans",("Possibly starting transaction"));
7577 const uint opti_node_select = THDVAR(thd, optimized_node_selection);
7578 DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));
7579 if (!(opti_node_select & 2) ||
7580 thd->lex->sql_command == SQLCOM_LOAD)
7581 if (unlikely(!start_transaction(error)))
7582 DBUG_RETURN(error);
7583
7584 thd_ndb->init_open_tables();
7585 thd_ndb->m_slow_path= FALSE;
7586 if (!(thd_options(thd) & OPTION_BIN_LOG) ||
7587 thd->variables.binlog_format == BINLOG_FORMAT_STMT)
7588 {
7589 thd_ndb->trans_options|= TNTO_NO_LOGGING;
7590 thd_ndb->m_slow_path= TRUE;
7591 }
7592 else if (thd->slave_thread)
7593 thd_ndb->m_slow_path= TRUE;
7594 }
7595 /*
7596 If this is the start of a LOCK TABLE, a table look
7597 should be taken on the table in NDB
7598
7599 Check if it should be read or write lock
7600 */
7601 if (thd_options(thd) & (OPTION_TABLE_LOCK))
7602 {
7603 /* This is currently dead code in wait for implementation in NDB */
7604 /* lockThisTable(); */
7605 DBUG_PRINT("info", ("Locking the table..." ));
7606 #ifdef NOT_YET
7607 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
7608 ER_GET_ERRMSG, ER(ER_GET_ERRMSG), 0,
7609 "Table only locked locally in this mysqld", "NDB");
7610 #endif
7611 }
7612 DBUG_RETURN(0);
7613 }
7614
7615 int
add_handler_to_open_tables(THD * thd,Thd_ndb * thd_ndb,ha_ndbcluster * handler)7616 ha_ndbcluster::add_handler_to_open_tables(THD *thd,
7617 Thd_ndb *thd_ndb,
7618 ha_ndbcluster* handler)
7619 {
7620 DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");
7621 DBUG_PRINT("info", ("Adding %s", handler->m_share->key));
7622
7623 /**
7624 * thd_ndb->open_tables is only used iff thd_ndb->m_handler is not
7625 */
7626 DBUG_ASSERT(thd_ndb->m_handler == NULL);
7627 const void *key= handler->m_share;
7628 HASH_SEARCH_STATE state;
7629 THD_NDB_SHARE *thd_ndb_share=
7630 (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,
7631 (const uchar *)&key, sizeof(key),
7632 &state);
7633 while (thd_ndb_share && thd_ndb_share->key != key)
7634 {
7635 thd_ndb_share=
7636 (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,
7637 (const uchar *)&key, sizeof(key),
7638 &state);
7639 }
7640 if (thd_ndb_share == 0)
7641 {
7642 thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root,
7643 sizeof(THD_NDB_SHARE));
7644 if (!thd_ndb_share)
7645 {
7646 mem_alloc_error(sizeof(THD_NDB_SHARE));
7647 DBUG_RETURN(1);
7648 }
7649 thd_ndb_share->key= key;
7650 thd_ndb_share->stat.last_count= thd_ndb->count;
7651 thd_ndb_share->stat.no_uncommitted_rows_count= 0;
7652 thd_ndb_share->stat.records= ~(ha_rows)0;
7653 my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);
7654 }
7655 else if (thd_ndb_share->stat.last_count != thd_ndb->count)
7656 {
7657 thd_ndb_share->stat.last_count= thd_ndb->count;
7658 thd_ndb_share->stat.no_uncommitted_rows_count= 0;
7659 thd_ndb_share->stat.records= ~(ha_rows)0;
7660 }
7661
7662 handler->m_table_info= &thd_ndb_share->stat;
7663 DBUG_RETURN(0);
7664 }
7665
init_handler_for_statement(THD * thd)7666 int ha_ndbcluster::init_handler_for_statement(THD *thd)
7667 {
7668 /*
7669 This is the place to make sure this handler instance
7670 has a started transaction.
7671
7672 The transaction is started by the first handler on which
7673 MySQL Server calls external lock
7674
7675 Other handlers in the same stmt or transaction should use
7676 the same NDB transaction. This is done by setting up the m_thd_ndb
7677 pointer to point to the NDB transaction object.
7678 */
7679
7680 DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
7681 Thd_ndb *thd_ndb= m_thd_ndb;
7682 DBUG_ASSERT(thd_ndb);
7683
7684 // store thread specific data first to set the right context
7685 m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);
7686 // Start of transaction
7687 m_rows_changed= 0;
7688 m_blobs_pending= FALSE;
7689 release_blobs_buffer();
7690 m_slow_path= m_thd_ndb->m_slow_path;
7691 #ifdef HAVE_NDB_BINLOG
7692 if (unlikely(m_slow_path))
7693 {
7694 if (m_share == ndb_apply_status_share && thd->slave_thread)
7695 m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
7696 }
7697 #endif
7698
7699 int ret = 0;
7700 if (thd_ndb->m_handler == 0)
7701 {
7702 DBUG_ASSERT(m_share);
7703 ret = add_handler_to_open_tables(thd, thd_ndb, this);
7704 }
7705 else
7706 {
7707 struct Ndb_local_table_statistics &stat= m_table_info_instance;
7708 stat.last_count= thd_ndb->count;
7709 stat.no_uncommitted_rows_count= 0;
7710 stat.records= ~(ha_rows)0;
7711 m_table_info= &stat;
7712 }
7713 DBUG_RETURN(ret);
7714 }
7715
external_lock(THD * thd,int lock_type)7716 int ha_ndbcluster::external_lock(THD *thd, int lock_type)
7717 {
7718 DBUG_ENTER("external_lock");
7719 if (lock_type != F_UNLCK)
7720 {
7721 int error;
7722 /*
7723 Check that this handler instance has a connection
7724 set up to the Ndb object of thd
7725 */
7726 if (check_ndb_connection(thd))
7727 DBUG_RETURN(1);
7728 Thd_ndb *thd_ndb= get_thd_ndb(thd);
7729
7730 DBUG_PRINT("enter", ("lock_type != F_UNLCK "
7731 "this: 0x%lx thd: 0x%lx thd_ndb: %lx "
7732 "thd_ndb->lock_count: %d",
7733 (long) this, (long) thd, (long) thd_ndb,
7734 thd_ndb->lock_count));
7735
7736 if ((error= start_statement(thd, thd_ndb,
7737 thd_ndb->lock_count++)))
7738 {
7739 thd_ndb->lock_count--;
7740 DBUG_RETURN(error);
7741 }
7742 if ((error= init_handler_for_statement(thd)))
7743 {
7744 thd_ndb->lock_count--;
7745 DBUG_RETURN(error);
7746 }
7747 DBUG_RETURN(0);
7748 }
7749 else
7750 {
7751 Thd_ndb *thd_ndb= m_thd_ndb;
7752 DBUG_ASSERT(thd_ndb);
7753
7754 DBUG_PRINT("enter", ("lock_type == F_UNLCK "
7755 "this: 0x%lx thd: 0x%lx thd_ndb: %lx "
7756 "thd_ndb->lock_count: %d",
7757 (long) this, (long) thd, (long) thd_ndb,
7758 thd_ndb->lock_count));
7759
7760 if (m_rows_changed && global_system_variables.query_cache_type)
7761 {
7762 DBUG_PRINT("info", ("Rows has changed"));
7763
7764 if (thd_ndb->trans &&
7765 thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
7766 {
7767 DBUG_PRINT("info", ("Add share to list of changed tables, %p",
7768 m_share));
7769 /* NOTE push_back allocates memory using transactions mem_root! */
7770 thd_ndb->changed_tables.push_back(get_share(m_share),
7771 &thd->transaction.mem_root);
7772 }
7773
7774 if (opt_ndb_cache_check_time)
7775 {
7776 pthread_mutex_lock(&m_share->mutex);
7777 DBUG_PRINT("info", ("Invalidating commit_count"));
7778 m_share->commit_count= 0;
7779 m_share->commit_count_lock++;
7780 pthread_mutex_unlock(&m_share->mutex);
7781 }
7782 }
7783
7784 if (!--thd_ndb->lock_count)
7785 {
7786 DBUG_PRINT("trans", ("Last external_lock"));
7787
7788 if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&
7789 thd_ndb->trans)
7790 {
7791 if (thd_ndb->trans)
7792 {
7793 /*
7794 Unlock is done without a transaction commit / rollback.
7795 This happens if the thread didn't update any rows
7796 We must in this case close the transaction to release resources
7797 */
7798 DBUG_PRINT("trans",("ending non-updating transaction"));
7799 thd_ndb->ndb->closeTransaction(thd_ndb->trans);
7800 thd_ndb->trans= NULL;
7801 thd_ndb->m_handler= NULL;
7802 }
7803 }
7804 }
7805 m_table_info= NULL;
7806
7807 /*
7808 This is the place to make sure this handler instance
7809 no longer are connected to the active transaction.
7810
7811 And since the handler is no longer part of the transaction
7812 it can't have open cursors, ops, queries or blobs pending.
7813 */
7814 m_thd_ndb= NULL;
7815
7816 if (m_active_query)
7817 DBUG_PRINT("warning", ("m_active_query != NULL"));
7818 m_active_query= NULL;
7819
7820 if (m_active_cursor)
7821 DBUG_PRINT("warning", ("m_active_cursor != NULL"));
7822 m_active_cursor= NULL;
7823
7824 if (m_multi_cursor)
7825 DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
7826 m_multi_cursor= NULL;
7827
7828 if (m_blobs_pending)
7829 DBUG_PRINT("warning", ("blobs_pending != 0"));
7830 m_blobs_pending= 0;
7831
7832 DBUG_RETURN(0);
7833 }
7834 }
7835
7836 /**
7837 Unlock the last row read in an open scan.
7838 Rows are unlocked by default in ndb, but
7839 for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
7840 locks are kept if unlock_row() is not called.
7841 */
7842
unlock_row()7843 void ha_ndbcluster::unlock_row()
7844 {
7845 DBUG_ENTER("unlock_row");
7846
7847 DBUG_PRINT("info", ("Unlocking row"));
7848 m_lock_tuple= FALSE;
7849 DBUG_VOID_RETURN;
7850 }
7851
7852 /**
7853 Start statement, used when one of the tables are locked and also when
7854 a stored function is executed.
7855
7856 start_stmt()
7857 thd Thd object
7858 lock_type Lock type on table
7859
7860 RETURN VALUE
7861 0 Success
7862 >0 Error code
7863
7864 DESCRIPTION
7865 This call indicates the start of a statement when one of the tables in
7866 the statement are locked. In this case we cannot call external_lock.
7867 It also implies that external_lock is not called at end of statement.
7868 Rather the handlerton call commit (ndbcluster_commit) is called to
7869 indicate end of transaction. There are cases thus when the commit call
7870 actually doesn't refer to a commit but only to and end of statement.
7871
7872 In the case of stored functions, one stored function is treated as one
7873 statement and the call to commit comes at the end of the stored function.
7874 */
7875
start_stmt(THD * thd,thr_lock_type lock_type)7876 int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
7877 {
7878 int error=0;
7879 Thd_ndb *thd_ndb;
7880 DBUG_ENTER("start_stmt");
7881 DBUG_ASSERT(thd == table->in_use);
7882
7883 thd_ndb= get_thd_ndb(thd);
7884 if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))
7885 goto error;
7886 if ((error= init_handler_for_statement(thd)))
7887 goto error;
7888 DBUG_RETURN(0);
7889 error:
7890 thd_ndb->start_stmt_count--;
7891 DBUG_RETURN(error);
7892 }
7893
7894 NdbTransaction *
start_transaction_row(const NdbRecord * ndb_record,const uchar * record,int & error)7895 ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,
7896 const uchar *record,
7897 int &error)
7898 {
7899 NdbTransaction *trans;
7900 DBUG_ENTER("ha_ndbcluster::start_transaction_row");
7901 DBUG_ASSERT(m_thd_ndb);
7902 DBUG_ASSERT(m_thd_ndb->trans == NULL);
7903
7904 transaction_checks(table->in_use, m_thd_ndb);
7905
7906 Ndb *ndb= m_thd_ndb->ndb;
7907
7908 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
7909 char *buf= (char*)&tmp[0];
7910 trans= ndb->startTransaction(ndb_record,
7911 (const char*)record,
7912 buf, sizeof(tmp));
7913
7914 if (trans)
7915 {
7916 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7917 DBUG_PRINT("info", ("Delayed allocation of TC"));
7918 DBUG_RETURN(m_thd_ndb->trans= trans);
7919 }
7920
7921 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7922 DBUG_RETURN(NULL);
7923 }
7924
7925 NdbTransaction *
start_transaction_key(uint inx_no,const uchar * key_data,int & error)7926 ha_ndbcluster::start_transaction_key(uint inx_no,
7927 const uchar *key_data,
7928 int &error)
7929 {
7930 NdbTransaction *trans;
7931 DBUG_ENTER("ha_ndbcluster::start_transaction_key");
7932 DBUG_ASSERT(m_thd_ndb);
7933 DBUG_ASSERT(m_thd_ndb->trans == NULL);
7934
7935 transaction_checks(table->in_use, m_thd_ndb);
7936
7937 Ndb *ndb= m_thd_ndb->ndb;
7938 const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;
7939
7940 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
7941 char *buf= (char*)&tmp[0];
7942 trans= ndb->startTransaction(key_rec,
7943 (const char*)key_data,
7944 buf, sizeof(tmp));
7945
7946 if (trans)
7947 {
7948 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7949 DBUG_PRINT("info", ("Delayed allocation of TC"));
7950 DBUG_RETURN(m_thd_ndb->trans= trans);
7951 }
7952
7953 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7954 DBUG_RETURN(NULL);
7955 }
7956
7957 NdbTransaction *
start_transaction(int & error)7958 ha_ndbcluster::start_transaction(int &error)
7959 {
7960 NdbTransaction *trans;
7961 DBUG_ENTER("ha_ndbcluster::start_transaction");
7962
7963 DBUG_ASSERT(m_thd_ndb);
7964 DBUG_ASSERT(m_thd_ndb->trans == NULL);
7965
7966 transaction_checks(table->in_use, m_thd_ndb);
7967 const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);
7968 m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);
7969 if ((trans= m_thd_ndb->ndb->startTransaction()))
7970 {
7971 m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;
7972 DBUG_PRINT("info", ("Delayed allocation of TC"));
7973 DBUG_RETURN(m_thd_ndb->trans= trans);
7974 }
7975
7976 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7977 DBUG_RETURN(NULL);
7978 }
7979
7980 NdbTransaction *
start_transaction_part_id(Uint32 part_id,int & error)7981 ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)
7982 {
7983 NdbTransaction *trans;
7984 DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");
7985
7986 DBUG_ASSERT(m_thd_ndb);
7987 DBUG_ASSERT(m_thd_ndb->trans == NULL);
7988
7989 transaction_checks(table->in_use, m_thd_ndb);
7990 if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))
7991 {
7992 m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7993 DBUG_PRINT("info", ("Delayed allocation of TC"));
7994 DBUG_RETURN(m_thd_ndb->trans= trans);
7995 }
7996
7997 ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7998 DBUG_RETURN(NULL);
7999 }
8000
8001
8002 /**
8003 Commit a transaction started in NDB.
8004 */
8005
ndbcluster_commit(handlerton * hton,THD * thd,bool all)8006 int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
8007 {
8008 int res= 0;
8009 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8010 Ndb *ndb= thd_ndb->ndb;
8011 NdbTransaction *trans= thd_ndb->trans;
8012
8013 DBUG_ENTER("ndbcluster_commit");
8014 DBUG_ASSERT(ndb);
8015 DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));
8016 thd_ndb->start_stmt_count= 0;
8017 if (trans == NULL)
8018 {
8019 DBUG_PRINT("info", ("trans == NULL"));
8020 DBUG_RETURN(0);
8021 }
8022 if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
8023 {
8024 /*
8025 An odditity in the handler interface is that commit on handlerton
8026 is called to indicate end of statement only in cases where
8027 autocommit isn't used and the all flag isn't set.
8028
8029 We also leave quickly when a transaction haven't even been started,
8030 in this case we are safe that no clean up is needed. In this case
8031 the MySQL Server could handle the query without contacting the
8032 NDB kernel.
8033 */
8034 thd_ndb->save_point_count++;
8035 DBUG_PRINT("info", ("Commit before start or end-of-statement only"));
8036 DBUG_RETURN(0);
8037 }
8038 thd_ndb->save_point_count= 0;
8039
8040 #ifdef HAVE_NDB_BINLOG
8041 if (unlikely(thd_ndb->m_slow_path))
8042 {
8043 if (thd->slave_thread)
8044 ndbcluster_update_apply_status
8045 (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
8046 }
8047 #endif /* HAVE_NDB_BINLOG */
8048
8049 if (thd->slave_thread)
8050 {
8051 if (!g_ndb_slave_state.current_conflict_defined_op_count ||
8052 !thd_ndb->m_unsent_bytes ||
8053 !(res= execute_no_commit(thd_ndb, trans, TRUE)))
8054 res= execute_commit(thd, thd_ndb, trans, 1, TRUE);
8055
8056 update_slave_api_stats(thd_ndb->ndb);
8057 }
8058 else
8059 {
8060 if (thd_ndb->m_handler &&
8061 thd_ndb->m_handler->m_read_before_write_removal_possible)
8062 {
8063 /*
8064 This is an autocommit involving only one table and
8065 rbwr is on, thus the transaction has already been
8066 committed in exec_bulk_update() or end_bulk_delete()
8067 */
8068 DBUG_PRINT("info", ("autocommit+rbwr, transaction already comitted"));
8069 if (trans->commitStatus() != NdbTransaction::Committed)
8070 {
8071 sql_print_error("found uncomitted autocommit+rbwr transaction, "
8072 "commit status: %d", trans->commitStatus());
8073 abort();
8074 }
8075 }
8076 else
8077 res= execute_commit(thd, thd_ndb, trans, THDVAR(thd, force_send), FALSE);
8078 }
8079
8080 if (res != 0)
8081 {
8082 const NdbError err= trans->getNdbError();
8083 const NdbOperation *error_op= trans->getNdbErrorOperation();
8084 res= ndb_to_mysql_error(&err);
8085 if (res != -1)
8086 ndbcluster_print_error(res, error_op);
8087 }
8088 else
8089 {
8090 /* Update shared statistics for tables inserted into / deleted from*/
8091 if (thd_ndb->m_handler && // Autocommit Txn
8092 thd_ndb->m_handler->m_share &&
8093 thd_ndb->m_handler->m_table_info)
8094 {
8095 modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);
8096 }
8097
8098 /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */
8099 for (uint i= 0; i<thd_ndb->open_tables.records; i++)
8100 {
8101 THD_NDB_SHARE *thd_share=
8102 (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);
8103 modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);
8104 }
8105 }
8106
8107 ndb->closeTransaction(trans);
8108 thd_ndb->trans= NULL;
8109 thd_ndb->m_handler= NULL;
8110
8111 /* Clear commit_count for tables changed by transaction */
8112 NDB_SHARE* share;
8113 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8114 while ((share= it++))
8115 {
8116 DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8117 share));
8118 pthread_mutex_lock(&share->mutex);
8119 DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
8120 share->table_name, (ulong) share->commit_count));
8121 share->commit_count= 0;
8122 share->commit_count_lock++;
8123 pthread_mutex_unlock(&share->mutex);
8124 free_share(&share);
8125 }
8126 thd_ndb->changed_tables.empty();
8127
8128 DBUG_RETURN(res);
8129 }
8130
8131
8132 /**
8133 Rollback a transaction started in NDB.
8134 */
8135
ndbcluster_rollback(handlerton * hton,THD * thd,bool all)8136 static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
8137 {
8138 int res= 0;
8139 Thd_ndb *thd_ndb= get_thd_ndb(thd);
8140 Ndb *ndb= thd_ndb->ndb;
8141 NdbTransaction *trans= thd_ndb->trans;
8142
8143 DBUG_ENTER("ndbcluster_rollback");
8144 DBUG_PRINT("enter", ("all: %d thd_ndb->save_point_count: %d",
8145 all, thd_ndb->save_point_count));
8146 DBUG_ASSERT(ndb);
8147 thd_ndb->start_stmt_count= 0;
8148 if (trans == NULL)
8149 {
8150 /* Ignore end-of-statement until real rollback or commit is called */
8151 DBUG_PRINT("info", ("trans == NULL"));
8152 DBUG_RETURN(0);
8153 }
8154 if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
8155 (thd_ndb->save_point_count > 0))
8156 {
8157 /*
8158 Ignore end-of-statement until real rollback or commit is called
8159 as ndb does not support rollback statement
8160 - mark that rollback was unsuccessful, this will cause full rollback
8161 of the transaction
8162 */
8163 DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));
8164 thd->mark_transaction_to_rollback(1);
8165 my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");
8166 DBUG_RETURN(0);
8167 }
8168 thd_ndb->save_point_count= 0;
8169 if (thd->slave_thread)
8170 g_ndb_slave_state.atTransactionAbort();
8171 thd_ndb->m_unsent_bytes= 0;
8172 thd_ndb->m_execute_count++;
8173 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
8174 if (trans->execute(NdbTransaction::Rollback) != 0)
8175 {
8176 const NdbError err= trans->getNdbError();
8177 const NdbOperation *error_op= trans->getNdbErrorOperation();
8178 res= ndb_to_mysql_error(&err);
8179 if (res != -1)
8180 ndbcluster_print_error(res, error_op);
8181 }
8182 ndb->closeTransaction(trans);
8183 thd_ndb->trans= NULL;
8184 thd_ndb->m_handler= NULL;
8185
8186 /* Clear list of tables changed by transaction */
8187 NDB_SHARE* share;
8188 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8189 while ((share= it++))
8190 {
8191 DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8192 share));
8193 free_share(&share);
8194 }
8195 thd_ndb->changed_tables.empty();
8196
8197 if (thd->slave_thread)
8198 update_slave_api_stats(thd_ndb->ndb);
8199
8200 DBUG_RETURN(res);
8201 }
8202
8203 /**
8204 * Support for create table/column modifiers
8205 * by exploiting the comment field
8206 */
8207 struct NDB_Modifier
8208 {
8209 enum { M_BOOL } m_type;
8210 const char * m_name;
8211 size_t m_name_len;
8212 bool m_found;
8213 union {
8214 bool m_val_bool;
8215 #ifdef TODO__
8216 int m_val_int;
8217 struct {
8218 const char * str;
8219 size_t len;
8220 } m_val_str;
8221 #endif
8222 };
8223 };
8224
8225 static const
8226 struct NDB_Modifier ndb_table_modifiers[] =
8227 {
8228 { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },
8229 { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
8230 };
8231
8232 static const
8233 struct NDB_Modifier ndb_column_modifiers[] =
8234 {
8235 { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },
8236 { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
8237 };
8238
8239 /**
8240 * NDB_Modifiers
8241 *
8242 * This class implements a simple parser for getting modifiers out
8243 * of a string (e.g a comment field)
8244 */
8245 class NDB_Modifiers
8246 {
8247 public:
8248 NDB_Modifiers(const NDB_Modifier modifiers[]);
8249 ~NDB_Modifiers();
8250
8251 /**
8252 * parse string-with length (not necessarily NULL terminated)
8253 */
8254 int parse(THD* thd, const char * prefix, const char * str, size_t strlen);
8255
8256 /**
8257 * Get modifier...returns NULL if unknown
8258 */
8259 const NDB_Modifier * get(const char * name) const;
8260 private:
8261 uint m_len;
8262 struct NDB_Modifier * m_modifiers;
8263
8264 int parse_modifier(THD *thd, const char * prefix,
8265 struct NDB_Modifier* m, const char * str);
8266 };
8267
8268 static
8269 bool
end_of_token(const char * str)8270 end_of_token(const char * str)
8271 {
8272 return str[0] == 0 || str[0] == ' ' || str[0] == ',';
8273 }
8274
NDB_Modifiers(const NDB_Modifier modifiers[])8275 NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])
8276 {
8277 for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)
8278 {}
8279 m_modifiers = new NDB_Modifier[m_len];
8280 memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));
8281 }
8282
~NDB_Modifiers()8283 NDB_Modifiers::~NDB_Modifiers()
8284 {
8285 delete [] m_modifiers;
8286 }
8287
8288 int
parse_modifier(THD * thd,const char * prefix,struct NDB_Modifier * m,const char * str)8289 NDB_Modifiers::parse_modifier(THD *thd,
8290 const char * prefix,
8291 struct NDB_Modifier* m,
8292 const char * str)
8293 {
8294 if (m->m_found)
8295 {
8296 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8297 ER_ILLEGAL_HA_CREATE_OPTION,
8298 "%s : modifier %s specified twice",
8299 prefix, m->m_name);
8300 }
8301
8302 switch(m->m_type){
8303 case NDB_Modifier::M_BOOL:
8304 if (end_of_token(str))
8305 {
8306 m->m_val_bool = true;
8307 goto found;
8308 }
8309 if (str[0] != '=')
8310 break;
8311
8312 str++;
8313 if (str[0] == '1' && end_of_token(str+1))
8314 {
8315 m->m_val_bool = true;
8316 goto found;
8317 }
8318
8319 if (str[0] == '0' && end_of_token(str+1))
8320 {
8321 m->m_val_bool = false;
8322 goto found;
8323 }
8324 }
8325
8326 {
8327 const char * end = strpbrk(str, " ,");
8328 if (end)
8329 {
8330 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8331 ER_ILLEGAL_HA_CREATE_OPTION,
8332 "%s : invalid value '%.*s' for %s",
8333 prefix, (int)(end - str), str, m->m_name);
8334 }
8335 else
8336 {
8337 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8338 ER_ILLEGAL_HA_CREATE_OPTION,
8339 "%s : invalid value '%s' for %s",
8340 prefix, str, m->m_name);
8341 }
8342 }
8343 return -1;
8344 found:
8345 m->m_found = true;
8346 return 0;
8347 }
8348
8349 int
parse(THD * thd,const char * prefix,const char * _source,size_t _source_len)8350 NDB_Modifiers::parse(THD *thd,
8351 const char * prefix,
8352 const char * _source,
8353 size_t _source_len)
8354 {
8355 if (_source == 0 || _source_len == 0)
8356 return 0;
8357
8358 const char * source = 0;
8359
8360 /**
8361 * Check if _source is NULL-terminated
8362 */
8363 for (size_t i = 0; i<_source_len; i++)
8364 {
8365 if (_source[i] == 0)
8366 {
8367 source = _source;
8368 break;
8369 }
8370 }
8371
8372 if (source == 0)
8373 {
8374 /**
8375 * Make NULL terminated string so that strXXX-functions are safe
8376 */
8377 char * tmp = new char[_source_len+1];
8378 if (tmp == 0)
8379 {
8380 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8381 ER_ILLEGAL_HA_CREATE_OPTION,
8382 "%s : unable to parse due to out of memory",
8383 prefix);
8384 return -1;
8385 }
8386 memcpy(tmp, _source, _source_len);
8387 tmp[_source_len] = 0;
8388 source = tmp;
8389 }
8390
8391 const char * pos = source;
8392 if ((pos = strstr(pos, prefix)) == 0)
8393 {
8394 if (source != _source)
8395 delete [] source;
8396 return 0;
8397 }
8398
8399 pos += strlen(prefix);
8400
8401 while (pos && pos[0] != 0 && pos[0] != ' ')
8402 {
8403 const char * end = strpbrk(pos, " ,"); // end of current modifier
8404
8405 for (uint i = 0; i < m_len; i++)
8406 {
8407 size_t l = m_modifiers[i].m_name_len;
8408 if (strncmp(pos, m_modifiers[i].m_name, l) == 0)
8409 {
8410 /**
8411 * Found modifier...
8412 */
8413
8414 if (! (end_of_token(pos + l) || pos[l] == '='))
8415 goto unknown;
8416
8417 pos += l;
8418 int res = parse_modifier(thd, prefix, m_modifiers+i, pos);
8419
8420 if (res == -1)
8421 {
8422 /**
8423 * We continue parsing even if modifier had error
8424 */
8425 }
8426
8427 goto next;
8428 }
8429 }
8430
8431 {
8432 unknown:
8433 if (end)
8434 {
8435 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8436 ER_ILLEGAL_HA_CREATE_OPTION,
8437 "%s : unknown modifier: %.*s",
8438 prefix, (int)(end - pos), pos);
8439 }
8440 else
8441 {
8442 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8443 ER_ILLEGAL_HA_CREATE_OPTION,
8444 "%s : unknown modifier: %s",
8445 prefix, pos);
8446 }
8447 }
8448
8449 next:
8450 pos = end;
8451 if (pos && pos[0] == ',')
8452 pos++;
8453 }
8454
8455 if (source != _source)
8456 delete [] source;
8457
8458 return 0;
8459 }
8460
8461 const NDB_Modifier *
get(const char * name) const8462 NDB_Modifiers::get(const char * name) const
8463 {
8464 for (uint i = 0; i < m_len; i++)
8465 {
8466 if (strcmp(name, m_modifiers[i].m_name) == 0)
8467 {
8468 return m_modifiers + i;
8469 }
8470 }
8471 return 0;
8472 }
8473
8474 /**
8475 Define NDB column based on Field.
8476
8477 Not member of ha_ndbcluster because NDBCOL cannot be declared.
8478
8479 MySQL text types with character set "binary" are mapped to true
8480 NDB binary types without a character set.
8481
8482 Blobs are V2 and striping from mysql level is not supported
8483 due to lack of syntax and lack of support for partitioning.
8484
8485 @return
8486 Returns 0 or mysql error code.
8487 */
8488
8489 static bool
ndb_blob_striping()8490 ndb_blob_striping()
8491 {
8492 #ifndef DBUG_OFF
8493 const char* p= getenv("NDB_BLOB_STRIPING");
8494 if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
8495 return true;
8496 #endif
8497 return false;
8498 }
8499
8500 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
8501 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;
8502 #else
8503 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;
8504 #endif
8505
create_ndb_column(THD * thd,NDBCOL & col,Field * field,HA_CREATE_INFO * create_info,column_format_type default_format=COLUMN_FORMAT_TYPE_DEFAULT)8506 static int create_ndb_column(THD *thd,
8507 NDBCOL &col,
8508 Field *field,
8509 HA_CREATE_INFO *create_info
8510 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8511 , column_format_type
8512 default_format= COLUMN_FORMAT_TYPE_DEFAULT
8513 #endif
8514 )
8515 {
8516 NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;
8517 bool dynamic= FALSE;
8518
8519 char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];
8520 DBUG_ENTER("create_ndb_column");
8521 // Set name
8522 if (col.setName(field->field_name))
8523 {
8524 DBUG_RETURN(my_errno= errno);
8525 }
8526 // Get char set
8527 CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());
8528 // Set type and sizes
8529 const enum enum_field_types mysql_type= field->real_type();
8530
8531 NDB_Modifiers column_modifiers(ndb_column_modifiers);
8532 column_modifiers.parse(thd, "NDB_COLUMN=",
8533 field->comment.str,
8534 field->comment.length);
8535
8536 const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");
8537
8538 {
8539 /* Clear default value (col obj is reused for whole table def) */
8540 col.setDefaultValue(NULL, 0);
8541
8542 /* If the data nodes are capable then set native
8543 * default.
8544 */
8545 bool nativeDefaults =
8546 ! (thd &&
8547 (! ndb_native_default_support(get_thd_ndb(thd)->
8548 ndb->getMinDbNodeVersion())));
8549
8550 if (likely( nativeDefaults ))
8551 {
8552 if ((!(field->flags & PRI_KEY_FLAG) ) &&
8553 type_supports_default_value(mysql_type))
8554 {
8555 if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
8556 {
8557 my_ptrdiff_t src_offset= field->table->s->default_values
8558 - field->table->record[0];
8559 if ((! field->is_real_null(src_offset)) ||
8560 ((field->flags & NOT_NULL_FLAG)))
8561 {
8562 /* Set a non-null native default */
8563 memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);
8564 get_default_value(buf, field);
8565
8566 /* For bit columns, default length is rounded up to
8567 nearest word, ensuring all data sent
8568 */
8569 Uint32 defaultLen = field_used_length(field);
8570 if(field->type() == MYSQL_TYPE_BIT)
8571 defaultLen = ((defaultLen + 3) /4) * 4;
8572 col.setDefaultValue(buf, defaultLen);
8573 }
8574 }
8575 }
8576 }
8577 }
8578 switch (mysql_type) {
8579 // Numeric types
8580 case MYSQL_TYPE_TINY:
8581 if (field->flags & UNSIGNED_FLAG)
8582 col.setType(NDBCOL::Tinyunsigned);
8583 else
8584 col.setType(NDBCOL::Tinyint);
8585 col.setLength(1);
8586 break;
8587 case MYSQL_TYPE_SHORT:
8588 if (field->flags & UNSIGNED_FLAG)
8589 col.setType(NDBCOL::Smallunsigned);
8590 else
8591 col.setType(NDBCOL::Smallint);
8592 col.setLength(1);
8593 break;
8594 case MYSQL_TYPE_LONG:
8595 if (field->flags & UNSIGNED_FLAG)
8596 col.setType(NDBCOL::Unsigned);
8597 else
8598 col.setType(NDBCOL::Int);
8599 col.setLength(1);
8600 break;
8601 case MYSQL_TYPE_INT24:
8602 if (field->flags & UNSIGNED_FLAG)
8603 col.setType(NDBCOL::Mediumunsigned);
8604 else
8605 col.setType(NDBCOL::Mediumint);
8606 col.setLength(1);
8607 break;
8608 case MYSQL_TYPE_LONGLONG:
8609 if (field->flags & UNSIGNED_FLAG)
8610 col.setType(NDBCOL::Bigunsigned);
8611 else
8612 col.setType(NDBCOL::Bigint);
8613 col.setLength(1);
8614 break;
8615 case MYSQL_TYPE_FLOAT:
8616 col.setType(NDBCOL::Float);
8617 col.setLength(1);
8618 break;
8619 case MYSQL_TYPE_DOUBLE:
8620 col.setType(NDBCOL::Double);
8621 col.setLength(1);
8622 break;
8623 case MYSQL_TYPE_DECIMAL:
8624 {
8625 Field_decimal *f= (Field_decimal*)field;
8626 uint precision= f->pack_length();
8627 uint scale= f->decimals();
8628 if (field->flags & UNSIGNED_FLAG)
8629 {
8630 col.setType(NDBCOL::Olddecimalunsigned);
8631 precision-= (scale > 0);
8632 }
8633 else
8634 {
8635 col.setType(NDBCOL::Olddecimal);
8636 precision-= 1 + (scale > 0);
8637 }
8638 col.setPrecision(precision);
8639 col.setScale(scale);
8640 col.setLength(1);
8641 }
8642 break;
8643 case MYSQL_TYPE_NEWDECIMAL:
8644 {
8645 Field_new_decimal *f= (Field_new_decimal*)field;
8646 uint precision= f->precision;
8647 uint scale= f->decimals();
8648 if (field->flags & UNSIGNED_FLAG)
8649 {
8650 col.setType(NDBCOL::Decimalunsigned);
8651 }
8652 else
8653 {
8654 col.setType(NDBCOL::Decimal);
8655 }
8656 col.setPrecision(precision);
8657 col.setScale(scale);
8658 col.setLength(1);
8659 }
8660 break;
8661 // Date types
8662 case MYSQL_TYPE_DATETIME:
8663 col.setType(NDBCOL::Datetime);
8664 col.setLength(1);
8665 break;
8666 case MYSQL_TYPE_DATE: // ?
8667 col.setType(NDBCOL::Char);
8668 col.setLength(field->pack_length());
8669 break;
8670 case MYSQL_TYPE_NEWDATE:
8671 col.setType(NDBCOL::Date);
8672 col.setLength(1);
8673 break;
8674 case MYSQL_TYPE_TIME:
8675 col.setType(NDBCOL::Time);
8676 col.setLength(1);
8677 break;
8678 case MYSQL_TYPE_YEAR:
8679 col.setType(NDBCOL::Year);
8680 col.setLength(1);
8681 break;
8682 case MYSQL_TYPE_TIMESTAMP:
8683 col.setType(NDBCOL::Timestamp);
8684 col.setLength(1);
8685 break;
8686 // Char types
8687 case MYSQL_TYPE_STRING:
8688 if (field->pack_length() == 0)
8689 {
8690 col.setType(NDBCOL::Bit);
8691 col.setLength(1);
8692 }
8693 else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8694 {
8695 col.setType(NDBCOL::Binary);
8696 col.setLength(field->pack_length());
8697 }
8698 else
8699 {
8700 col.setType(NDBCOL::Char);
8701 col.setCharset(cs);
8702 col.setLength(field->pack_length());
8703 }
8704 break;
8705 case MYSQL_TYPE_VAR_STRING: // ?
8706 case MYSQL_TYPE_VARCHAR:
8707 {
8708 Field_varstring* f= (Field_varstring*)field;
8709 if (f->length_bytes == 1)
8710 {
8711 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8712 col.setType(NDBCOL::Varbinary);
8713 else {
8714 col.setType(NDBCOL::Varchar);
8715 col.setCharset(cs);
8716 }
8717 }
8718 else if (f->length_bytes == 2)
8719 {
8720 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8721 col.setType(NDBCOL::Longvarbinary);
8722 else {
8723 col.setType(NDBCOL::Longvarchar);
8724 col.setCharset(cs);
8725 }
8726 }
8727 else
8728 {
8729 DBUG_RETURN(HA_ERR_UNSUPPORTED);
8730 }
8731 col.setLength(field->field_length);
8732 }
8733 break;
8734 // Blob types (all come in as MYSQL_TYPE_BLOB)
8735 mysql_type_tiny_blob:
8736 case MYSQL_TYPE_TINY_BLOB:
8737 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8738 col.setType(NDBCOL::Blob);
8739 else {
8740 col.setType(NDBCOL::Text);
8741 col.setCharset(cs);
8742 }
8743 col.setInlineSize(256);
8744 // No parts
8745 col.setPartSize(0);
8746 col.setStripeSize(ndb_blob_striping() ? 0 : 0);
8747 break;
8748 //mysql_type_blob:
8749 case MYSQL_TYPE_GEOMETRY:
8750 case MYSQL_TYPE_BLOB:
8751 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8752 col.setType(NDBCOL::Blob);
8753 else {
8754 col.setType(NDBCOL::Text);
8755 col.setCharset(cs);
8756 }
8757 {
8758 Field_blob *field_blob= (Field_blob *)field;
8759 /*
8760 * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
8761 * Tinyblob gets no blob parts. The other cases are just a crude
8762 * way to control part size and striping.
8763 *
8764 * In mysql blob(256) is promoted to blob(65535) so it does not
8765 * in fact fit "inline" in NDB.
8766 */
8767 if (field_blob->max_data_length() < (1 << 8))
8768 goto mysql_type_tiny_blob;
8769 else if (field_blob->max_data_length() < (1 << 16))
8770 {
8771 col.setInlineSize(256);
8772 col.setPartSize(2000);
8773 col.setStripeSize(ndb_blob_striping() ? 16 : 0);
8774 if (mod_maxblob->m_found)
8775 {
8776 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8777 }
8778 }
8779 else if (field_blob->max_data_length() < (1 << 24))
8780 goto mysql_type_medium_blob;
8781 else
8782 goto mysql_type_long_blob;
8783 }
8784 break;
8785 mysql_type_medium_blob:
8786 case MYSQL_TYPE_MEDIUM_BLOB:
8787 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8788 col.setType(NDBCOL::Blob);
8789 else {
8790 col.setType(NDBCOL::Text);
8791 col.setCharset(cs);
8792 }
8793 col.setInlineSize(256);
8794 col.setPartSize(4000);
8795 col.setStripeSize(ndb_blob_striping() ? 8 : 0);
8796 if (mod_maxblob->m_found)
8797 {
8798 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8799 }
8800 break;
8801 mysql_type_long_blob:
8802 case MYSQL_TYPE_LONG_BLOB:
8803 if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8804 col.setType(NDBCOL::Blob);
8805 else {
8806 col.setType(NDBCOL::Text);
8807 col.setCharset(cs);
8808 }
8809 col.setInlineSize(256);
8810 col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8811 col.setStripeSize(ndb_blob_striping() ? 4 : 0);
8812 if (mod_maxblob->m_found)
8813 {
8814 col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8815 }
8816 break;
8817 // Other types
8818 case MYSQL_TYPE_ENUM:
8819 col.setType(NDBCOL::Char);
8820 col.setLength(field->pack_length());
8821 break;
8822 case MYSQL_TYPE_SET:
8823 col.setType(NDBCOL::Char);
8824 col.setLength(field->pack_length());
8825 break;
8826 case MYSQL_TYPE_BIT:
8827 {
8828 int no_of_bits= field->field_length;
8829 col.setType(NDBCOL::Bit);
8830 if (!no_of_bits)
8831 col.setLength(1);
8832 else
8833 col.setLength(no_of_bits);
8834 break;
8835 }
8836 case MYSQL_TYPE_NULL:
8837 goto mysql_type_unsupported;
8838 mysql_type_unsupported:
8839 default:
8840 DBUG_RETURN(HA_ERR_UNSUPPORTED);
8841 }
8842 // Set nullable and pk
8843 col.setNullable(field->maybe_null());
8844 col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
8845 if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)
8846 {
8847 col.setPartitionKey(TRUE);
8848 }
8849
8850 // Set autoincrement
8851 if (field->flags & AUTO_INCREMENT_FLAG)
8852 {
8853 #ifndef DBUG_OFF
8854 char buff[22];
8855 #endif
8856 col.setAutoIncrement(TRUE);
8857 ulonglong value= create_info->auto_increment_value ?
8858 create_info->auto_increment_value : (ulonglong) 1;
8859 DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));
8860 col.setAutoIncrementInitialValue(value);
8861 }
8862 else
8863 col.setAutoIncrement(FALSE);
8864
8865 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8866 DBUG_PRINT("info", ("storage: %u format: %u ",
8867 field->field_storage_type(),
8868 field->column_format()));
8869 switch (field->field_storage_type()) {
8870 case(HA_SM_DEFAULT):
8871 default:
8872 if (create_info->storage_media == HA_SM_DISK)
8873 type= NDBCOL::StorageTypeDisk;
8874 else
8875 type= NDBCOL::StorageTypeMemory;
8876 break;
8877 case(HA_SM_DISK):
8878 type= NDBCOL::StorageTypeDisk;
8879 break;
8880 case(HA_SM_MEMORY):
8881 type= NDBCOL::StorageTypeMemory;
8882 break;
8883 }
8884
8885 switch (field->column_format()) {
8886 case(COLUMN_FORMAT_TYPE_FIXED):
8887 dynamic= FALSE;
8888 break;
8889 case(COLUMN_FORMAT_TYPE_DYNAMIC):
8890 dynamic= TRUE;
8891 break;
8892 case(COLUMN_FORMAT_TYPE_DEFAULT):
8893 default:
8894 if (create_info->row_type == ROW_TYPE_DEFAULT)
8895 dynamic= default_format;
8896 else
8897 dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);
8898 break;
8899 }
8900 #endif
8901 DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,
8902 (dynamic) ? "dynamic" : "static"));
8903 if (type == NDBCOL::StorageTypeDisk)
8904 {
8905 if (dynamic)
8906 {
8907 DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",
8908 field->field_name));
8909 dynamic= false;
8910 }
8911
8912 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8913 if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)
8914 {
8915 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8916 ER_ILLEGAL_HA_CREATE_OPTION,
8917 "DYNAMIC column %s with "
8918 "STORAGE DISK is not supported, "
8919 "column will become FIXED",
8920 field->field_name);
8921 }
8922 #endif
8923 }
8924
8925 switch (create_info->row_type) {
8926 case ROW_TYPE_FIXED:
8927 if (thd && (dynamic || field_type_forces_var_part(field->type())))
8928 {
8929 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8930 ER_ILLEGAL_HA_CREATE_OPTION,
8931 "Row format FIXED incompatible with "
8932 "dynamic attribute %s",
8933 field->field_name);
8934 }
8935 break;
8936 case ROW_TYPE_DYNAMIC:
8937 /*
8938 Future: make columns dynamic in this case
8939 */
8940 break;
8941 default:
8942 break;
8943 }
8944
8945 DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));
8946 col.setStorageType(type);
8947 col.setDynamic(dynamic);
8948
8949 DBUG_RETURN(0);
8950 }
8951
update_create_info(HA_CREATE_INFO * create_info)8952 void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)
8953 {
8954 DBUG_ENTER("ha_ndbcluster::update_create_info");
8955 THD *thd= current_thd;
8956 const NDBTAB *ndbtab= m_table;
8957 Ndb *ndb= check_ndb_in_thd(thd);
8958
8959 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
8960 {
8961 /*
8962 Find any initial auto_increment value
8963 */
8964 for (uint i= 0; i < table->s->fields; i++)
8965 {
8966 Field *field= table->field[i];
8967 if (field->flags & AUTO_INCREMENT_FLAG)
8968 {
8969 ulonglong auto_value;
8970 uint retries= NDB_AUTO_INCREMENT_RETRIES;
8971 int retry_sleep= 30; /* 30 milliseconds, transaction */
8972 for (;;)
8973 {
8974 Ndb_tuple_id_range_guard g(m_share);
8975 if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))
8976 {
8977 if (--retries && !thd->killed &&
8978 ndb->getNdbError().status == NdbError::TemporaryError)
8979 {
8980 do_retry_sleep(retry_sleep);
8981 continue;
8982 }
8983 const NdbError err= ndb->getNdbError();
8984 sql_print_error("Error %lu in ::update_create_info(): %s",
8985 (ulong) err.code, err.message);
8986 DBUG_VOID_RETURN;
8987 }
8988 break;
8989 }
8990 if (auto_value > 1)
8991 {
8992 create_info->auto_increment_value= auto_value;
8993 }
8994 break;
8995 }
8996 }
8997 }
8998
8999 DBUG_VOID_RETURN;
9000 }
9001
9002 /*
9003 Create a table in NDB Cluster
9004 */
get_no_fragments(ulonglong max_rows)9005 static uint get_no_fragments(ulonglong max_rows)
9006 {
9007 ulonglong acc_row_size= 25 + /*safety margin*/ 2;
9008 ulonglong acc_fragment_size= 512*1024*1024;
9009 return uint((max_rows*acc_row_size)/acc_fragment_size)+1;
9010 }
9011
9012
9013 /*
9014 Routine to adjust default number of partitions to always be a multiple
9015 of number of nodes and never more than 4 times the number of nodes.
9016
9017 */
9018 static
9019 bool
adjusted_frag_count(Ndb * ndb,uint requested_frags,uint & reported_frags)9020 adjusted_frag_count(Ndb* ndb,
9021 uint requested_frags,
9022 uint &reported_frags)
9023 {
9024 unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();
9025 unsigned no_replicas= no_nodes == 1 ? 1 : 2;
9026
9027 unsigned no_threads= 1;
9028 const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;
9029
9030 {
9031 /**
9032 * Use SYSTAB_0 to get #replicas, and to guess #threads
9033 */
9034 char dbname[FN_HEADLEN+1];
9035 dbname[FN_HEADLEN]= 0;
9036 strnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);
9037 ndb->setDatabaseName("sys");
9038 Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");
9039 const NdbDictionary::Table * tab = ndbtab_g.get_table();
9040 if (tab)
9041 {
9042 no_replicas= ndbtab_g.get_table()->getReplicaCount();
9043
9044 /**
9045 * Guess #threads
9046 */
9047 {
9048 const Uint32 frags = tab->getFragmentCount();
9049 Uint32 node = 0;
9050 Uint32 cnt = 0;
9051 for (Uint32 i = 0; i<frags; i++)
9052 {
9053 Uint32 replicas[4];
9054 if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))
9055 {
9056 if (node == replicas[0] || node == 0)
9057 {
9058 node = replicas[0];
9059 cnt ++;
9060 }
9061 }
9062 }
9063 no_threads = cnt; // No of primary replica on 1-node
9064 }
9065 }
9066 ndb->setDatabaseName(dbname);
9067 }
9068
9069 const unsigned usable_nodes = no_replicas * no_nodegroups;
9070 const uint max_replicas = 8 * usable_nodes * no_threads;
9071
9072 reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads
9073 Uint32 replicas = reported_frags * no_replicas;
9074
9075 /**
9076 * Loop until requested replicas, and not exceed max-replicas
9077 */
9078 while (reported_frags < requested_frags &&
9079 (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)
9080 {
9081 reported_frags += usable_nodes * no_threads;
9082 replicas += usable_nodes * no_threads * no_replicas;
9083 }
9084
9085 return (reported_frags < requested_frags);
9086 }
9087
9088
9089 /**
9090 Create a table in NDB Cluster
9091 */
9092
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)9093 int ha_ndbcluster::create(const char *name,
9094 TABLE *form,
9095 HA_CREATE_INFO *create_info)
9096 {
9097 THD *thd= current_thd;
9098 NDBTAB tab;
9099 NDBCOL col;
9100 size_t pack_length, length;
9101 uint i, pk_length= 0;
9102 uchar *data= NULL, *pack_data= NULL;
9103 bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);
9104 bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
9105 bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
9106 bool use_disk= FALSE;
9107 NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
9108 bool ndb_sys_table= FALSE;
9109 int result= 0;
9110 NdbDictionary::ObjectId objId;
9111
9112 DBUG_ENTER("ha_ndbcluster::create");
9113 DBUG_PRINT("enter", ("name: %s", name));
9114
9115 if (create_temporary)
9116 {
9117 /*
9118 Ndb does not support temporary tables
9119 */
9120 my_errno= ER_ILLEGAL_HA_CREATE_OPTION;
9121 DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));
9122 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9123 ER_ILLEGAL_HA_CREATE_OPTION,
9124 "Ndb doesn't support temporary tables");
9125 DBUG_RETURN(my_errno);
9126 }
9127
9128 DBUG_ASSERT(*fn_rext((char*)name) == 0);
9129 set_dbname(name);
9130 set_tabname(name);
9131
9132 if ((my_errno= check_ndb_connection(thd)))
9133 DBUG_RETURN(my_errno);
9134
9135 Ndb *ndb= get_ndb(thd);
9136 NDBDICT *dict= ndb->getDictionary();
9137
9138 table= form;
9139 if (create_from_engine)
9140 {
9141 /*
9142 Table already exists in NDB and frm file has been created by
9143 caller.
9144 Do Ndb specific stuff, such as create a .ndb file
9145 */
9146 if ((my_errno= write_ndb_file(name)))
9147 DBUG_RETURN(my_errno);
9148
9149 ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),
9150 m_dbname, m_tabname, form);
9151 DBUG_RETURN(my_errno);
9152 }
9153
9154 Thd_ndb *thd_ndb= get_thd_ndb(thd);
9155
9156 if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||
9157 thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))
9158
9159 DBUG_RETURN(HA_ERR_NO_CONNECTION);
9160
9161 /*
9162 Don't allow table creation unless
9163 schema distribution table is setup
9164 ( unless it is a creation of the schema dist table itself )
9165 */
9166 if (!ndb_schema_share)
9167 {
9168 if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
9169 strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
9170 {
9171 DBUG_PRINT("info", ("Schema distribution table not setup"));
9172 DBUG_RETURN(HA_ERR_NO_CONNECTION);
9173 }
9174 single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
9175 ndb_sys_table= TRUE;
9176 }
9177
9178 if (!ndb_apply_status_share)
9179 {
9180 if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&
9181 strcmp(m_tabname, NDB_APPLY_TABLE) == 0))
9182 {
9183 ndb_sys_table= TRUE;
9184 }
9185 }
9186
9187 if (is_truncate)
9188 {
9189 Ndb_table_guard ndbtab_g(dict);
9190 ndbtab_g.init(m_tabname);
9191 if (!(m_table= ndbtab_g.get_table()))
9192 ERR_RETURN(dict->getNdbError());
9193 m_table= NULL;
9194 DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
9195 if ((my_errno= delete_table(name)))
9196 DBUG_RETURN(my_errno);
9197 ndbtab_g.reinit();
9198 }
9199
9200 NDB_Modifiers table_modifiers(ndb_table_modifiers);
9201 table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
9202 create_info->comment.length);
9203 const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");
9204
9205 #ifdef HAVE_NDB_BINLOG
9206 /* Read ndb_replication entry for this table, if any */
9207 Uint32 binlog_flags;
9208 const st_conflict_fn_def* conflict_fn= NULL;
9209 st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
9210 Uint32 num_args = MAX_CONFLICT_ARGS;
9211
9212 int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,
9213 ndb,
9214 m_dbname,
9215 m_tabname,
9216 ::server_id,
9217 form,
9218 &binlog_flags,
9219 &conflict_fn,
9220 args,
9221 &num_args);
9222 if (rep_read_rc != 0)
9223 {
9224 DBUG_RETURN(rep_read_rc);
9225 }
9226
9227 /* Reset database name */
9228 ndb->setDatabaseName(m_dbname);
9229
9230 /* Use ndb_replication information as required */
9231 if (conflict_fn != NULL)
9232 {
9233 switch(conflict_fn->type)
9234 {
9235 case CFT_NDB_EPOCH:
9236 {
9237 /* Default 6 extra Gci bits allows 2^6 == 64
9238 * epochs / saveGCP, a comfortable default
9239 */
9240 Uint32 numExtraGciBits = 6;
9241 Uint32 numExtraAuthorBits = 1;
9242
9243 if ((num_args == 1) &&
9244 (args[0].type == CFAT_EXTRA_GCI_BITS))
9245 {
9246 numExtraGciBits = args[0].extraGciBits;
9247 }
9248 DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "
9249 "ExtraAuthorBits to %u",
9250 numExtraGciBits,
9251 numExtraAuthorBits));
9252
9253 tab.setExtraRowGciBits(numExtraGciBits);
9254 tab.setExtraRowAuthorBits(numExtraAuthorBits);
9255 }
9256 default:
9257 break;
9258 }
9259 }
9260 #endif
9261
9262 if ((dict->beginSchemaTrans() == -1))
9263 {
9264 DBUG_PRINT("info", ("Failed to start schema transaction"));
9265 goto err_return;
9266 }
9267 DBUG_PRINT("info", ("Started schema transaction"));
9268
9269 DBUG_PRINT("table", ("name: %s", m_tabname));
9270 if (tab.setName(m_tabname))
9271 {
9272 my_errno= errno;
9273 goto abort;
9274 }
9275 if (!ndb_sys_table)
9276 {
9277 if (THDVAR(thd, table_temporary))
9278 {
9279 #ifdef DOES_NOT_WORK_CURRENTLY
9280 tab.setTemporary(TRUE);
9281 #endif
9282 tab.setLogging(FALSE);
9283 }
9284 else if (THDVAR(thd, table_no_logging))
9285 {
9286 tab.setLogging(FALSE);
9287 }
9288
9289 if (mod_nologging->m_found)
9290 {
9291 tab.setLogging(!mod_nologging->m_val_bool);
9292 }
9293 }
9294 tab.setSingleUserMode(single_user_mode);
9295
9296 // Save frm data for this table
9297 if (readfrm(name, &data, &length))
9298 {
9299 result= 1;
9300 goto abort_return;
9301 }
9302 if (packfrm(data, length, &pack_data, &pack_length))
9303 {
9304 my_free((char*)data, MYF(0));
9305 result= 2;
9306 goto abort_return;
9307 }
9308 DBUG_PRINT("info",
9309 ("setFrm data: 0x%lx len: %lu", (long) pack_data,
9310 (ulong) pack_length));
9311 tab.setFrm(pack_data, Uint32(pack_length));
9312 my_free((char*)data, MYF(0));
9313 my_free((char*)pack_data, MYF(0));
9314
9315 /*
9316 Handle table row type
9317
9318 Default is to let table rows have var part reference so that online
9319 add column can be performed in the future. Explicitly setting row
9320 type to fixed will omit var part reference, which will save data
9321 memory in ndb, but at the cost of not being able to online add
9322 column to this table
9323 */
9324 switch (create_info->row_type) {
9325 case ROW_TYPE_FIXED:
9326 tab.setForceVarPart(FALSE);
9327 break;
9328 case ROW_TYPE_DYNAMIC:
9329 /* fall through, treat as default */
9330 default:
9331 /* fall through, treat as default */
9332 case ROW_TYPE_DEFAULT:
9333 tab.setForceVarPart(TRUE);
9334 break;
9335 }
9336
9337 /*
9338 Setup columns
9339 */
9340 my_bitmap_map *old_map;
9341 {
9342 restore_record(form, s->default_values);
9343 old_map= tmp_use_all_columns(form, form->read_set);
9344 }
9345
9346 for (i= 0; i < form->s->fields; i++)
9347 {
9348 Field *field= form->field[i];
9349 DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",
9350 field->field_name, field->real_type(),
9351 field->pack_length()));
9352 if ((my_errno= create_ndb_column(thd, col, field, create_info)))
9353 goto abort;
9354
9355 if (!use_disk &&
9356 col.getStorageType() == NDBCOL::StorageTypeDisk)
9357 use_disk= TRUE;
9358
9359 if (tab.addColumn(col))
9360 {
9361 my_errno= errno;
9362 goto abort;
9363 }
9364 if (col.getPrimaryKey())
9365 pk_length += (field->pack_length() + 3) / 4;
9366 }
9367
9368 tmp_restore_column_map(form->read_set, old_map);
9369 if (use_disk)
9370 {
9371 tab.setLogging(TRUE);
9372 tab.setTemporary(FALSE);
9373 if (create_info->tablespace)
9374 tab.setTablespaceName(create_info->tablespace);
9375 else
9376 tab.setTablespaceName("DEFAULT-TS");
9377 }
9378
9379 // Save the table level storage media setting
9380 switch(create_info->storage_media)
9381 {
9382 case HA_SM_DISK:
9383 tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);
9384 break;
9385 case HA_SM_DEFAULT:
9386 tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);
9387 break;
9388 case HA_SM_MEMORY:
9389 tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);
9390 break;
9391 }
9392
9393 DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",
9394 m_tabname,
9395 (use_disk) ? "disk" : "memory",
9396 (use_disk) ? tab.getTablespaceName() : "N/A"));
9397
9398 KEY* key_info;
9399 for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
9400 {
9401 KEY_PART_INFO *key_part= key_info->key_part;
9402 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
9403 for (; key_part != end; key_part++)
9404 {
9405 #ifndef NDB_WITHOUT_COLUMN_FORMAT
9406 if (key_part->field->field_storage_type() == HA_SM_DISK)
9407 {
9408 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9409 ER_ILLEGAL_HA_CREATE_OPTION,
9410 ER(ER_ILLEGAL_HA_CREATE_OPTION),
9411 ndbcluster_hton_name,
9412 "Index on field "
9413 "declared with "
9414 "STORAGE DISK is not supported");
9415 result= HA_ERR_UNSUPPORTED;
9416 goto abort_return;
9417 }
9418 #endif
9419 tab.getColumn(key_part->fieldnr-1)->setStorageType(
9420 NdbDictionary::Column::StorageTypeMemory);
9421 }
9422 }
9423
9424 // No primary key, create shadow key as 64 bit, auto increment
9425 if (form->s->primary_key == MAX_KEY)
9426 {
9427 DBUG_PRINT("info", ("Generating shadow key"));
9428 if (col.setName("$PK"))
9429 {
9430 my_errno= errno;
9431 goto abort;
9432 }
9433 col.setType(NdbDictionary::Column::Bigunsigned);
9434 col.setLength(1);
9435 col.setNullable(FALSE);
9436 col.setPrimaryKey(TRUE);
9437 col.setAutoIncrement(TRUE);
9438 col.setDefaultValue(NULL, 0);
9439 if (tab.addColumn(col))
9440 {
9441 my_errno= errno;
9442 goto abort;
9443 }
9444 pk_length += 2;
9445 }
9446
9447 // Make sure that blob tables don't have too big part size
9448 for (i= 0; i < form->s->fields; i++)
9449 {
9450 /**
9451 * The extra +7 concists
9452 * 2 - words from pk in blob table
9453 * 5 - from extra words added by tup/dict??
9454 */
9455
9456 // To be upgrade/downgrade safe...we currently use
9457 // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set
9458 switch (form->field[i]->real_type()) {
9459 case MYSQL_TYPE_GEOMETRY:
9460 case MYSQL_TYPE_BLOB:
9461 case MYSQL_TYPE_MEDIUM_BLOB:
9462 case MYSQL_TYPE_LONG_BLOB:
9463 {
9464 NdbDictionary::Column * column= tab.getColumn(i);
9465 unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;
9466 unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;
9467 if (column->getPartSize() > (int)(4 * ndb_max))
9468 ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE
9469
9470 if (size > ndb_max &&
9471 (pk_length+7) < ndb_max)
9472 {
9473 size= ndb_max - pk_length - 7;
9474 column->setPartSize(4*size);
9475 }
9476 /**
9477 * If size > NDB_MAX and pk_length+7 >= NDB_MAX
9478 * then the table can't be created anyway, so skip
9479 * changing part size, and have error later
9480 */
9481 }
9482 default:
9483 break;
9484 }
9485 }
9486
9487 // Check partition info
9488 if ((my_errno= set_up_partition_info(form->part_info, tab)))
9489 goto abort;
9490
9491 if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
9492 tab.getDefaultNoPartitionsFlag() &&
9493 (create_info->max_rows != 0 || create_info->min_rows != 0))
9494 {
9495 ulonglong rows= create_info->max_rows >= create_info->min_rows ?
9496 create_info->max_rows :
9497 create_info->min_rows;
9498 uint no_fragments= get_no_fragments(rows);
9499 uint reported_frags= no_fragments;
9500 if (adjusted_frag_count(ndb, no_fragments, reported_frags))
9501 {
9502 push_warning(current_thd,
9503 Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
9504 "Ndb might have problems storing the max amount "
9505 "of rows specified");
9506 }
9507 tab.setFragmentCount(reported_frags);
9508 tab.setDefaultNoPartitionsFlag(false);
9509 tab.setFragmentData(0, 0);
9510 }
9511
9512 // Check for HashMap
9513 if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
9514 tab.getDefaultNoPartitionsFlag())
9515 {
9516 tab.setFragmentCount(0);
9517 tab.setFragmentData(0, 0);
9518 }
9519 else if (tab.getFragmentType() == NDBTAB::HashMapPartition)
9520 {
9521 NdbDictionary::HashMap hm;
9522 int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());
9523 if (res == -1)
9524 {
9525 res= dict->initDefaultHashMap(hm, tab.getFragmentCount());
9526 if (res == -1)
9527 {
9528 const NdbError err= dict->getNdbError();
9529 my_errno= ndb_to_mysql_error(&err);
9530 goto abort;
9531 }
9532
9533 res= dict->createHashMap(hm);
9534 if (res == -1)
9535 {
9536 const NdbError err= dict->getNdbError();
9537 my_errno= ndb_to_mysql_error(&err);
9538 goto abort;
9539 }
9540 }
9541 }
9542
9543 // Create the table in NDB
9544 if (dict->createTable(tab, &objId) != 0)
9545 {
9546 const NdbError err= dict->getNdbError();
9547 my_errno= ndb_to_mysql_error(&err);
9548 goto abort;
9549 }
9550
9551 DBUG_PRINT("info", ("Table %s/%s created successfully",
9552 m_dbname, m_tabname));
9553
9554 // Create secondary indexes
9555 tab.assignObjId(objId);
9556 m_table= &tab;
9557 my_errno= create_indexes(thd, ndb, form);
9558 m_table= 0;
9559
9560 if (!my_errno)
9561 {
9562 /*
9563 * All steps have succeeded, try and commit schema transaction
9564 */
9565 if (dict->endSchemaTrans() == -1)
9566 goto err_return;
9567 my_errno= write_ndb_file(name);
9568 }
9569 else
9570 {
9571 abort:
9572 /*
9573 * Some step during table creation failed, abort schema transaction
9574 */
9575 DBUG_PRINT("info", ("Aborting schema transaction due to error %i",
9576 my_errno));
9577 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9578 == -1)
9579 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9580 dict->getNdbError().code));
9581 m_table= 0;
9582 DBUG_RETURN(my_errno);
9583 abort_return:
9584 DBUG_PRINT("info", ("Aborting schema transaction"));
9585 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9586 == -1)
9587 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9588 dict->getNdbError().code));
9589 DBUG_RETURN(result);
9590 err_return:
9591 m_table= 0;
9592 ERR_RETURN(dict->getNdbError());
9593 }
9594
9595 /**
9596 * createTable/index schema transaction OK
9597 */
9598 Ndb_table_guard ndbtab_g(dict, m_tabname);
9599 m_table= ndbtab_g.get_table();
9600
9601 if (my_errno)
9602 {
9603 /*
9604 Failed to create an index,
9605 drop the table (and all it's indexes)
9606 */
9607 while (!thd->killed)
9608 {
9609 if (dict->beginSchemaTrans() == -1)
9610 goto cleanup_failed;
9611 if (dict->dropTableGlobal(*m_table))
9612 {
9613 switch (dict->getNdbError().status)
9614 {
9615 case NdbError::TemporaryError:
9616 if (!thd->killed)
9617 {
9618 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9619 == -1)
9620 DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9621 dict->getNdbError().code));
9622 goto cleanup_failed;
9623 }
9624 break;
9625 default:
9626 break;
9627 }
9628 }
9629 if (dict->endSchemaTrans() == -1)
9630 {
9631 cleanup_failed:
9632 DBUG_PRINT("info", ("Could not cleanup failed create %i",
9633 dict->getNdbError().code));
9634 continue; // retry indefinitly
9635 }
9636 break;
9637 }
9638 m_table = 0;
9639 DBUG_RETURN(my_errno);
9640 }
9641 else // if (!my_errno)
9642 {
9643 NDB_SHARE *share= 0;
9644 pthread_mutex_lock(&ndbcluster_mutex);
9645 /*
9646 First make sure we get a "fresh" share here, not an old trailing one...
9647 */
9648 {
9649 uint length= (uint) strlen(name);
9650 if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
9651 (const uchar*) name, length)))
9652 handle_trailing_share(thd, share);
9653 }
9654 /*
9655 get a new share
9656 */
9657
9658 /* ndb_share reference create */
9659 if (!(share= get_share(name, form, TRUE, TRUE)))
9660 {
9661 sql_print_error("NDB: allocating table share for %s failed", name);
9662 /* my_errno is set */
9663 }
9664 else
9665 {
9666 DBUG_PRINT("NDB_SHARE", ("%s binlog create use_count: %u",
9667 share->key, share->use_count));
9668 }
9669 pthread_mutex_unlock(&ndbcluster_mutex);
9670
9671 while (!IS_TMP_PREFIX(m_tabname))
9672 {
9673 #ifdef HAVE_NDB_BINLOG
9674 if (share)
9675 {
9676 /* Set the Binlogging information we retrieved above */
9677 ndbcluster_apply_binlog_replication_info(thd,
9678 share,
9679 m_table,
9680 form,
9681 conflict_fn,
9682 args,
9683 num_args,
9684 TRUE, /* Do set binlog flags */
9685 binlog_flags);
9686 }
9687 #endif
9688 String event_name(INJECTOR_EVENT_LEN);
9689 ndb_rep_event_name(&event_name, m_dbname, m_tabname,
9690 get_binlog_full(share));
9691 int do_event_op= ndb_binlog_running;
9692
9693 if (!ndb_schema_share &&
9694 strcmp(share->db, NDB_REP_DB) == 0 &&
9695 strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
9696 do_event_op= 1;
9697
9698 /*
9699 Always create an event for the table, as other mysql servers
9700 expect it to be there.
9701 */
9702 if (!ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,
9703 share && do_event_op ? 2 : 1/* push warning */))
9704 {
9705 if (opt_ndb_extra_logging)
9706 sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
9707 event_name.c_ptr());
9708 if (share &&
9709 ndbcluster_create_event_ops(thd, share,
9710 m_table, event_name.c_ptr()))
9711 {
9712 sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
9713 " Event: %s", name);
9714 /* a warning has been issued to the client */
9715 }
9716 }
9717 /*
9718 warning has been issued if ndbcluster_create_event failed
9719 and (share && do_event_op)
9720 */
9721 if (share && !do_event_op)
9722 set_binlog_nologging(share);
9723 ndbcluster_log_schema_op(thd,
9724 thd->query(), thd->query_length(),
9725 share->db, share->table_name,
9726 m_table->getObjectId(),
9727 m_table->getObjectVersion(),
9728 (is_truncate) ?
9729 SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE,
9730 NULL, NULL);
9731 break;
9732 }
9733 }
9734
9735 m_table= 0;
9736 DBUG_RETURN(my_errno);
9737 }
9738
9739
create_index(THD * thd,const char * name,KEY * key_info,NDB_INDEX_TYPE idx_type,uint idx_no)9740 int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info,
9741 NDB_INDEX_TYPE idx_type, uint idx_no)
9742 {
9743 int error= 0;
9744 char unique_name[FN_LEN + 1];
9745 static const char* unique_suffix= "$unique";
9746 DBUG_ENTER("ha_ndbcluster::create_ordered_index");
9747 DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));
9748
9749 if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
9750 {
9751 strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
9752 DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
9753 unique_name, idx_no));
9754 }
9755
9756 switch (idx_type){
9757 case PRIMARY_KEY_INDEX:
9758 // Do nothing, already created
9759 break;
9760 case PRIMARY_KEY_ORDERED_INDEX:
9761 error= create_ordered_index(thd, name, key_info);
9762 break;
9763 case UNIQUE_ORDERED_INDEX:
9764 if (!(error= create_ordered_index(thd, name, key_info)))
9765 error= create_unique_index(thd, unique_name, key_info);
9766 break;
9767 case UNIQUE_INDEX:
9768 if (check_index_fields_not_null(key_info))
9769 {
9770 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9771 ER_NULL_COLUMN_IN_INDEX,
9772 "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
9773 }
9774 error= create_unique_index(thd, unique_name, key_info);
9775 break;
9776 case ORDERED_INDEX:
9777 if (key_info->algorithm == HA_KEY_ALG_HASH)
9778 {
9779 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9780 ER_ILLEGAL_HA_CREATE_OPTION,
9781 ER(ER_ILLEGAL_HA_CREATE_OPTION),
9782 ndbcluster_hton_name,
9783 "Ndb does not support non-unique "
9784 "hash based indexes");
9785 error= HA_ERR_UNSUPPORTED;
9786 break;
9787 }
9788 error= create_ordered_index(thd, name, key_info);
9789 break;
9790 default:
9791 DBUG_ASSERT(FALSE);
9792 break;
9793 }
9794
9795 DBUG_RETURN(error);
9796 }
9797
create_ordered_index(THD * thd,const char * name,KEY * key_info)9798 int ha_ndbcluster::create_ordered_index(THD *thd, const char *name,
9799 KEY *key_info)
9800 {
9801 DBUG_ENTER("ha_ndbcluster::create_ordered_index");
9802 DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));
9803 }
9804
create_unique_index(THD * thd,const char * name,KEY * key_info)9805 int ha_ndbcluster::create_unique_index(THD *thd, const char *name,
9806 KEY *key_info)
9807 {
9808
9809 DBUG_ENTER("ha_ndbcluster::create_unique_index");
9810 DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));
9811 }
9812
9813
9814 /**
9815 Create an index in NDB Cluster.
9816
9817 @todo
9818 Only temporary ordered indexes supported
9819 */
9820
create_ndb_index(THD * thd,const char * name,KEY * key_info,bool unique)9821 int ha_ndbcluster::create_ndb_index(THD *thd, const char *name,
9822 KEY *key_info,
9823 bool unique)
9824 {
9825 char index_name[FN_LEN + 1];
9826 Ndb *ndb= get_ndb(thd);
9827 NdbDictionary::Dictionary *dict= ndb->getDictionary();
9828 KEY_PART_INFO *key_part= key_info->key_part;
9829 KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
9830
9831 DBUG_ENTER("ha_ndbcluster::create_index");
9832 DBUG_PRINT("enter", ("name: %s ", name));
9833
9834 ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');
9835 DBUG_PRINT("info", ("index name: %s ", index_name));
9836
9837 NdbDictionary::Index ndb_index(index_name);
9838 if (unique)
9839 ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
9840 else
9841 {
9842 ndb_index.setType(NdbDictionary::Index::OrderedIndex);
9843 // TODO Only temporary ordered indexes supported
9844 ndb_index.setLogging(FALSE);
9845 }
9846 if (!m_table->getLogging())
9847 ndb_index.setLogging(FALSE);
9848 if (((NDBTAB*)m_table)->getTemporary())
9849 ndb_index.setTemporary(TRUE);
9850 if (ndb_index.setTable(m_tabname))
9851 {
9852 DBUG_RETURN(my_errno= errno);
9853 }
9854
9855 for (; key_part != end; key_part++)
9856 {
9857 Field *field= key_part->field;
9858 #ifndef NDB_WITHOUT_COLUMN_FORMAT
9859 if (field->field_storage_type() == HA_SM_DISK)
9860 {
9861 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9862 ER_ILLEGAL_HA_CREATE_OPTION,
9863 ER(ER_ILLEGAL_HA_CREATE_OPTION),
9864 ndbcluster_hton_name,
9865 "Index on field "
9866 "declared with "
9867 "STORAGE DISK is not supported");
9868 DBUG_RETURN(HA_ERR_UNSUPPORTED);
9869 }
9870 #endif
9871 DBUG_PRINT("info", ("attr: %s", field->field_name));
9872 if (ndb_index.addColumnName(field->field_name))
9873 {
9874 DBUG_RETURN(my_errno= errno);
9875 }
9876 }
9877
9878 if (dict->createIndex(ndb_index, *m_table))
9879 ERR_RETURN(dict->getNdbError());
9880
9881 // Success
9882 DBUG_PRINT("info", ("Created index %s", name));
9883 DBUG_RETURN(0);
9884 }
9885
add_index_impl(THD * thd,TABLE * table_arg,KEY * key_info,uint num_of_keys)9886 int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg,
9887 KEY *key_info, uint num_of_keys)
9888 {
9889 int error= 0;
9890 uint idx;
9891 DBUG_ENTER("ha_ndbcluster::add_index");
9892 DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
9893 DBUG_ASSERT(m_share->state == NSS_ALTERED);
9894
9895 for (idx= 0; idx < num_of_keys; idx++)
9896 {
9897 KEY *key= key_info + idx;
9898 KEY_PART_INFO *key_part= key->key_part;
9899 KEY_PART_INFO *end= key_part + key->user_defined_key_parts;
9900 NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
9901 DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
9902 // Add fields to key_part struct
9903 for (; key_part != end; key_part++)
9904 key_part->field= table->field[key_part->fieldnr];
9905 // Check index type
9906 // Create index in ndb
9907 if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))
9908 break;
9909 }
9910 DBUG_RETURN(error);
9911 }
9912
9913 /**
9914 Rename a table in NDB Cluster.
9915 */
9916
rename_table(const char * from,const char * to)9917 int ha_ndbcluster::rename_table(const char *from, const char *to)
9918 {
9919 THD *thd= current_thd;
9920 NDBDICT *dict;
9921 char old_dbname[FN_HEADLEN];
9922 char new_dbname[FN_HEADLEN];
9923 char new_tabname[FN_HEADLEN];
9924 const NDBTAB *orig_tab;
9925 int result;
9926 bool recreate_indexes= FALSE;
9927 NDBDICT::List index_list;
9928
9929 DBUG_ENTER("ha_ndbcluster::rename_table");
9930 DBUG_PRINT("info", ("Renaming %s to %s", from, to));
9931
9932 if (thd == injector_thd)
9933 {
9934 /*
9935 Table was renamed remotely is already
9936 renamed inside ndb.
9937 Just rename .ndb file.
9938 */
9939 DBUG_RETURN(handler::rename_table(from, to));
9940 }
9941
9942 set_dbname(from, old_dbname);
9943 set_dbname(to, new_dbname);
9944 set_tabname(from);
9945 set_tabname(to, new_tabname);
9946
9947 if (check_ndb_connection(thd))
9948 DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);
9949
9950 Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);
9951 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
9952 DBUG_RETURN(HA_ERR_NO_CONNECTION);
9953
9954 Ndb *ndb= get_ndb(thd);
9955 ndb->setDatabaseName(old_dbname);
9956 dict= ndb->getDictionary();
9957 Ndb_table_guard ndbtab_g(dict, m_tabname);
9958 if (!(orig_tab= ndbtab_g.get_table()))
9959 ERR_RETURN(dict->getNdbError());
9960
9961 if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
9962 {
9963 dict->listIndexes(index_list, *orig_tab);
9964 recreate_indexes= TRUE;
9965 }
9966 // Change current database to that of target table
9967 set_dbname(to);
9968 if (ndb->setDatabaseName(m_dbname))
9969 {
9970 ERR_RETURN(ndb->getNdbError());
9971 }
9972
9973 int ndb_table_id= orig_tab->getObjectId();
9974 int ndb_table_version= orig_tab->getObjectVersion();
9975 /* ndb_share reference temporary */
9976 NDB_SHARE *share= get_share(from, 0, FALSE);
9977 int is_old_table_tmpfile= IS_TMP_PREFIX(m_tabname);
9978 int is_new_table_tmpfile= IS_TMP_PREFIX(new_tabname);
9979 if (!is_new_table_tmpfile && !is_old_table_tmpfile)
9980 {
9981 /*
9982 this is a "real" rename table, i.e. not tied to an offline alter table
9983 - send new name == "to" in query field
9984 */
9985 ndbcluster_log_schema_op(thd, to, strlen(to),
9986 old_dbname, m_tabname,
9987 ndb_table_id, ndb_table_version,
9988 SOT_RENAME_TABLE_PREPARE,
9989 m_dbname, new_tabname);
9990 }
9991 if (share)
9992 {
9993 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
9994 share->key, share->use_count));
9995 ndbcluster_prepare_rename_share(share, to);
9996 int ret = ndbcluster_rename_share(thd, share);
9997 assert(ret == 0); NDB_IGNORE_VALUE(ret);
9998 }
9999
10000 NdbDictionary::Table new_tab= *orig_tab;
10001 new_tab.setName(new_tabname);
10002 if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
10003 {
10004 NdbError ndb_error= dict->getNdbError();
10005 if (share)
10006 {
10007 int ret = ndbcluster_undo_rename_share(thd, share);
10008 assert(ret == 0); NDB_IGNORE_VALUE(ret);
10009 /* ndb_share reference temporary free */
10010 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
10011 share->key, share->use_count));
10012 free_share(&share);
10013 }
10014 ERR_RETURN(ndb_error);
10015 }
10016
10017 // Rename .ndb file
10018 if ((result= handler::rename_table(from, to)))
10019 {
10020 // ToDo in 4.1 should rollback alter table...
10021 if (share)
10022 {
10023 /* ndb_share reference temporary free */
10024 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
10025 share->key, share->use_count));
10026 free_share(&share);
10027 }
10028 DBUG_RETURN(result);
10029 }
10030
10031 /* handle old table */
10032 if (!is_old_table_tmpfile)
10033 {
10034 ndbcluster_drop_event(thd, ndb, share, "rename table",
10035 old_dbname, m_tabname);
10036 }
10037
10038 if (!result && !is_new_table_tmpfile)
10039 {
10040 Ndb_table_guard ndbtab_g2(dict, new_tabname);
10041 const NDBTAB *ndbtab= ndbtab_g2.get_table();
10042 #ifdef HAVE_NDB_BINLOG
10043 if (share)
10044 ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,
10045 ::server_id, NULL, TRUE);
10046 #endif
10047 /* always create an event for the table */
10048 String event_name(INJECTOR_EVENT_LEN);
10049 ndb_rep_event_name(&event_name, new_dbname, new_tabname,
10050 get_binlog_full(share));
10051
10052 if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,
10053 new_tabname) &&
10054 !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,
10055 share && ndb_binlog_running ? 2 : 1/* push warning */))
10056 {
10057 if (opt_ndb_extra_logging)
10058 sql_print_information("NDB Binlog: RENAME Event: %s",
10059 event_name.c_ptr());
10060 if (share && (share->op == 0) &&
10061 ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))
10062 {
10063 sql_print_error("NDB Binlog: FAILED create event operations "
10064 "during RENAME. Event %s", event_name.c_ptr());
10065 /* a warning has been issued to the client */
10066 }
10067 }
10068 /*
10069 warning has been issued if ndbcluster_create_event failed
10070 and (share && ndb_binlog_running)
10071 */
10072 if (!is_old_table_tmpfile)
10073 {
10074 /* "real" rename table */
10075 ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
10076 old_dbname, m_tabname,
10077 ndb_table_id, ndb_table_version,
10078 SOT_RENAME_TABLE,
10079 m_dbname, new_tabname);
10080 }
10081 else
10082 {
10083 /* final phase of offline alter table */
10084 ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
10085 m_dbname, new_tabname,
10086 ndb_table_id, ndb_table_version,
10087 SOT_ALTER_TABLE_COMMIT,
10088 NULL, NULL);
10089
10090 }
10091 }
10092
10093 // If we are moving tables between databases, we need to recreate
10094 // indexes
10095 if (recreate_indexes)
10096 {
10097 for (unsigned i = 0; i < index_list.count; i++)
10098 {
10099 NDBDICT::List::Element& index_el = index_list.elements[i];
10100 // Recreate any indexes not stored in the system database
10101 if (my_strcasecmp(system_charset_info,
10102 index_el.database, NDB_SYSTEM_DATABASE))
10103 {
10104 set_dbname(from);
10105 ndb->setDatabaseName(m_dbname);
10106 const NDBINDEX * index= dict->getIndexGlobal(index_el.name, new_tab);
10107 DBUG_PRINT("info", ("Creating index %s/%s",
10108 index_el.database, index->getName()));
10109 dict->createIndex(*index, new_tab);
10110 DBUG_PRINT("info", ("Dropping index %s/%s",
10111 index_el.database, index->getName()));
10112 set_dbname(from);
10113 ndb->setDatabaseName(m_dbname);
10114 dict->dropIndexGlobal(*index);
10115 }
10116 }
10117 }
10118 if (share)
10119 {
10120 /* ndb_share reference temporary free */
10121 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
10122 share->key, share->use_count));
10123 free_share(&share);
10124 }
10125
10126 DBUG_RETURN(result);
10127 }
10128
10129
10130 /**
10131 Delete table from NDB Cluster.
10132 */
10133
10134 static
10135 void
delete_table_drop_share(NDB_SHARE * share,const char * path)10136 delete_table_drop_share(NDB_SHARE* share, const char * path)
10137 {
10138 if (share)
10139 {
10140 pthread_mutex_lock(&ndbcluster_mutex);
10141 do_drop:
10142 if (share->state != NSS_DROPPED)
10143 {
10144 /*
10145 The share kept by the server has not been freed, free it
10146 */
10147 share->state= NSS_DROPPED;
10148 /* ndb_share reference create free */
10149 DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u",
10150 share->key, share->use_count));
10151 free_share(&share, TRUE);
10152 }
10153 /* ndb_share reference temporary free */
10154 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
10155 share->key, share->use_count));
10156 free_share(&share, TRUE);
10157 pthread_mutex_unlock(&ndbcluster_mutex);
10158 }
10159 else if (path)
10160 {
10161 pthread_mutex_lock(&ndbcluster_mutex);
10162 share= get_share(path, 0, FALSE, TRUE);
10163 if (share)
10164 {
10165 goto do_drop;
10166 }
10167 pthread_mutex_unlock(&ndbcluster_mutex);
10168 }
10169 }
10170
10171 /* static version which does not need a handler */
10172
10173 int
drop_table_impl(THD * thd,ha_ndbcluster * h,Ndb * ndb,const char * path,const char * db,const char * table_name)10174 ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,
10175 const char *path,
10176 const char *db,
10177 const char *table_name)
10178 {
10179 DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table");
10180 NDBDICT *dict= ndb->getDictionary();
10181 int ndb_table_id= 0;
10182 int ndb_table_version= 0;
10183 /*
10184 Don't allow drop table unless
10185 schema distribution table is setup
10186 */
10187 if (!ndb_schema_share)
10188 {
10189 DBUG_PRINT("info", ("Schema distribution table not setup"));
10190 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10191 }
10192 /* ndb_share reference temporary */
10193 NDB_SHARE *share= get_share(path, 0, FALSE);
10194 if (share)
10195 {
10196 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
10197 share->key, share->use_count));
10198 }
10199
10200 /* Drop the table from NDB */
10201
10202 int res= 0;
10203 if (h && h->m_table)
10204 {
10205 retry_temporary_error1:
10206 if (dict->dropTableGlobal(*h->m_table) == 0)
10207 {
10208 ndb_table_id= h->m_table->getObjectId();
10209 ndb_table_version= h->m_table->getObjectVersion();
10210 DBUG_PRINT("info", ("success 1"));
10211 }
10212 else
10213 {
10214 switch (dict->getNdbError().status)
10215 {
10216 case NdbError::TemporaryError:
10217 if (!thd->killed)
10218 goto retry_temporary_error1; // retry indefinitly
10219 break;
10220 default:
10221 break;
10222 }
10223 res= ndb_to_mysql_error(&dict->getNdbError());
10224 DBUG_PRINT("info", ("error(1) %u", res));
10225 }
10226 h->release_metadata(thd, ndb);
10227 }
10228 else
10229 {
10230 ndb->setDatabaseName(db);
10231 while (1)
10232 {
10233 Ndb_table_guard ndbtab_g(dict, table_name);
10234 if (ndbtab_g.get_table())
10235 {
10236 retry_temporary_error2:
10237 if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0)
10238 {
10239 ndb_table_id= ndbtab_g.get_table()->getObjectId();
10240 ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
10241 DBUG_PRINT("info", ("success 2"));
10242 break;
10243 }
10244 else
10245 {
10246 switch (dict->getNdbError().status)
10247 {
10248 case NdbError::TemporaryError:
10249 if (!thd->killed)
10250 goto retry_temporary_error2; // retry indefinitly
10251 break;
10252 default:
10253 if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
10254 {
10255 ndbtab_g.invalidate();
10256 continue;
10257 }
10258 break;
10259 }
10260 }
10261 }
10262 res= ndb_to_mysql_error(&dict->getNdbError());
10263 DBUG_PRINT("info", ("error(2) %u", res));
10264 break;
10265 }
10266 }
10267
10268 if (res)
10269 {
10270 /* the drop table failed for some reason, drop the share anyways */
10271 delete_table_drop_share(share, 0);
10272 DBUG_RETURN(res);
10273 }
10274
10275 /* stop the logging of the dropped table, and cleanup */
10276
10277 /*
10278 drop table is successful even if table does not exist in ndb
10279 and in case table was actually not dropped, there is no need
10280 to force a gcp, and setting the event_name to null will indicate
10281 that there is no event to be dropped
10282 */
10283 int table_dropped= dict->getNdbError().code != 709;
10284
10285 {
10286 if (table_dropped)
10287 {
10288 ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
10289 db, table_name);
10290 }
10291 else
10292 {
10293 /**
10294 * Setting 0,0 will cause ndbcluster_drop_event *not* to be called
10295 */
10296 ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
10297 0, 0);
10298 }
10299 }
10300
10301 if (!IS_TMP_PREFIX(table_name) && share &&
10302 thd->lex->sql_command != SQLCOM_TRUNCATE)
10303 {
10304 ndbcluster_log_schema_op(thd,
10305 thd->query(), thd->query_length(),
10306 share->db, share->table_name,
10307 ndb_table_id, ndb_table_version,
10308 SOT_DROP_TABLE, NULL, NULL);
10309 }
10310
10311 delete_table_drop_share(share, 0);
10312 DBUG_RETURN(0);
10313 }
10314
delete_table(const char * name)10315 int ha_ndbcluster::delete_table(const char *name)
10316 {
10317 THD *thd= current_thd;
10318 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10319 Ndb *ndb;
10320 int error= 0;
10321 DBUG_ENTER("ha_ndbcluster::delete_table");
10322 DBUG_PRINT("enter", ("name: %s", name));
10323
10324 if ((thd == injector_thd) ||
10325 (thd_ndb->options & TNO_NO_NDB_DROP_TABLE))
10326 {
10327 /*
10328 Table was dropped remotely is already
10329 dropped inside ndb.
10330 Just drop local files.
10331 */
10332 delete_table_drop_share(0, name);
10333 DBUG_RETURN(handler::delete_table(name));
10334 }
10335
10336 set_dbname(name);
10337 set_tabname(name);
10338
10339 /*
10340 Don't allow drop table unless
10341 schema distribution table is setup
10342 */
10343 if (!ndb_schema_share)
10344 {
10345 DBUG_PRINT("info", ("Schema distribution table not setup"));
10346 error= HA_ERR_NO_CONNECTION;
10347 goto err;
10348 }
10349
10350 if (check_ndb_connection(thd))
10351 {
10352 error= HA_ERR_NO_CONNECTION;
10353 goto err;
10354 }
10355
10356 ndb= thd_ndb->ndb;
10357
10358 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))
10359 {
10360 error= HA_ERR_NO_CONNECTION;
10361 goto err;
10362 }
10363
10364 /*
10365 Drop table in ndb.
10366 If it was already gone it might have been dropped
10367 remotely, give a warning and then drop .ndb file.
10368 */
10369 if (!(error= drop_table_impl(thd, this, ndb, name,
10370 m_dbname, m_tabname)) ||
10371 error == HA_ERR_NO_SUCH_TABLE)
10372 {
10373 /* Call ancestor function to delete .ndb file */
10374 int error1= handler::delete_table(name);
10375 if (!error)
10376 error= error1;
10377 }
10378
10379 err:
10380 DBUG_RETURN(error);
10381 }
10382
10383
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)10384 void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
10385 ulonglong nb_desired_values,
10386 ulonglong *first_value,
10387 ulonglong *nb_reserved_values)
10388 {
10389 Uint64 auto_value;
10390 THD *thd= current_thd;
10391 DBUG_ENTER("get_auto_increment");
10392 DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
10393 Ndb *ndb= get_ndb(table->in_use);
10394 uint retries= NDB_AUTO_INCREMENT_RETRIES;
10395 int retry_sleep= 30; /* 30 milliseconds, transaction */
10396 for (;;)
10397 {
10398 Ndb_tuple_id_range_guard g(m_share);
10399 if ((m_skip_auto_increment &&
10400 ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
10401 ndb->getAutoIncrementValue(m_table, g.range, auto_value,
10402 Uint32(m_autoincrement_prefetch),
10403 increment, offset))
10404 {
10405 if (--retries && !thd->killed &&
10406 ndb->getNdbError().status == NdbError::TemporaryError)
10407 {
10408 do_retry_sleep(retry_sleep);
10409 continue;
10410 }
10411 const NdbError err= ndb->getNdbError();
10412 sql_print_error("Error %lu in ::get_auto_increment(): %s",
10413 (ulong) err.code, err.message);
10414 *first_value= ~(ulonglong) 0;
10415 DBUG_VOID_RETURN;
10416 }
10417 break;
10418 }
10419 *first_value= (longlong)auto_value;
10420 /* From the point of view of MySQL, NDB reserves one row at a time */
10421 *nb_reserved_values= 1;
10422 DBUG_VOID_RETURN;
10423 }
10424
10425
10426 /**
10427 Constructor for the NDB Cluster table handler .
10428 */
10429
ha_ndbcluster(handlerton * hton,TABLE_SHARE * table_arg)10430 ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
10431 handler(hton, table_arg),
10432 m_thd_ndb(NULL),
10433 m_active_cursor(NULL),
10434 m_table(NULL),
10435 m_ndb_record(0),
10436 m_ndb_hidden_key_record(0),
10437 m_table_info(NULL),
10438 m_share(0),
10439 m_key_fields(NULL),
10440 m_part_info(NULL),
10441 m_user_defined_partitioning(FALSE),
10442 m_use_partition_pruning(FALSE),
10443 m_sorted(FALSE),
10444 m_use_write(FALSE),
10445 m_ignore_dup_key(FALSE),
10446 m_has_unique_index(FALSE),
10447 m_ignore_no_key(FALSE),
10448 m_read_before_write_removal_possible(FALSE),
10449 m_read_before_write_removal_used(FALSE),
10450 m_rows_updated(0),
10451 m_rows_deleted(0),
10452 m_rows_to_insert((ha_rows) 1),
10453 m_rows_inserted((ha_rows) 0),
10454 m_rows_changed((ha_rows) 0),
10455 m_delete_cannot_batch(FALSE),
10456 m_update_cannot_batch(FALSE),
10457 m_skip_auto_increment(TRUE),
10458 m_blobs_pending(0),
10459 m_is_bulk_delete(false),
10460 m_blobs_row_total_size(0),
10461 m_blobs_buffer(0),
10462 m_blobs_buffer_size(0),
10463 m_dupkey((uint) -1),
10464 m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),
10465 m_pushed_join_member(NULL),
10466 m_pushed_join_operation(-1),
10467 m_disable_pushed_join(FALSE),
10468 m_active_query(NULL),
10469 m_pushed_operation(NULL),
10470 m_cond(NULL),
10471 m_multi_cursor(NULL)
10472 {
10473 int i;
10474
10475 DBUG_ENTER("ha_ndbcluster");
10476
10477 m_tabname[0]= '\0';
10478 m_dbname[0]= '\0';
10479
10480 stats.records= ~(ha_rows)0; // uninitialized
10481 stats.block_size= 1024;
10482
10483 for (i= 0; i < MAX_KEY; i++)
10484 ndb_init_index(m_index[i]);
10485
10486 DBUG_VOID_RETURN;
10487 }
10488
10489
10490 /**
10491 Destructor for NDB Cluster table handler.
10492 */
10493
~ha_ndbcluster()10494 ha_ndbcluster::~ha_ndbcluster()
10495 {
10496 THD *thd= current_thd;
10497 Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
10498 DBUG_ENTER("~ha_ndbcluster");
10499
10500 if (m_share)
10501 {
10502 /* ndb_share reference handler free */
10503 DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u",
10504 m_share->key, m_share->use_count));
10505 free_share(&m_share);
10506 }
10507 release_metadata(thd, ndb);
10508 release_blobs_buffer();
10509
10510 // Check for open cursor/transaction
10511 DBUG_ASSERT(m_thd_ndb == NULL);
10512
10513 // Discard any generated condition
10514 DBUG_PRINT("info", ("Deleting generated condition"));
10515 if (m_cond)
10516 {
10517 delete m_cond;
10518 m_cond= NULL;
10519 }
10520 DBUG_PRINT("info", ("Deleting pushed joins"));
10521 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
10522 DBUG_ASSERT(m_active_query == NULL);
10523 DBUG_ASSERT(m_active_cursor == NULL);
10524 if (m_pushed_join_operation==PUSHED_ROOT)
10525 {
10526 delete m_pushed_join_member; // Also delete QueryDef
10527 }
10528 m_pushed_join_member= NULL;
10529 #endif
10530 DBUG_VOID_RETURN;
10531 }
10532
10533
10534 /**
10535 Open a table for further use
10536 - fetch metadata for this table from NDB
10537 - check that table exists
10538
10539 @retval
10540 0 ok
10541 @retval
10542 < 0 Table has changed
10543 */
10544
open(const char * name,int mode,uint test_if_locked)10545 int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
10546 {
10547 THD *thd= current_thd;
10548 int res;
10549 KEY *key;
10550 KEY_PART_INFO *key_part_info;
10551 uint key_parts, i, j;
10552 DBUG_ENTER("ha_ndbcluster::open");
10553 DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d",
10554 name, mode, test_if_locked));
10555
10556 if (table_share->primary_key != MAX_KEY)
10557 {
10558 /*
10559 Setup ref_length to make room for the whole
10560 primary key to be written in the ref variable
10561 */
10562 key= table->key_info+table_share->primary_key;
10563 ref_length= key->key_length;
10564 }
10565 else
10566 {
10567 if (m_user_defined_partitioning)
10568 {
10569 /* Add space for partid in ref */
10570 ref_length+= sizeof(m_part_id);
10571 }
10572 }
10573 DBUG_PRINT("info", ("ref_length: %d", ref_length));
10574
10575 {
10576 char* bitmap_array;
10577 uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;
10578 uint n_keys= table_share->keys + extra_hidden_keys;
10579 uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);
10580 uint map_size= sizeof(MY_BITMAP) * n_keys;
10581 m_key_fields= (MY_BITMAP**)my_malloc(ptr_size + map_size,
10582 MYF(MY_WME + MY_ZEROFILL));
10583 if (!m_key_fields)
10584 {
10585 local_close(thd, FALSE);
10586 DBUG_RETURN(1);
10587 }
10588 bitmap_array= ((char*)m_key_fields) + ptr_size;
10589 for (i= 0; i < n_keys; i++)
10590 {
10591 my_bitmap_map *bitbuf= NULL;
10592 bool is_hidden_key= (i == table_share->keys);
10593 m_key_fields[i]= (MY_BITMAP*)bitmap_array;
10594 if (is_hidden_key || (i == table_share->primary_key))
10595 {
10596 m_pk_bitmap_p= m_key_fields[i];
10597 bitbuf= m_pk_bitmap_buf;
10598 }
10599 if (bitmap_init(m_key_fields[i], bitbuf,
10600 table_share->fields, FALSE))
10601 {
10602 m_key_fields[i]= NULL;
10603 local_close(thd, FALSE);
10604 DBUG_RETURN(1);
10605 }
10606 if (!is_hidden_key)
10607 {
10608 key= table->key_info + i;
10609 key_part_info= key->key_part;
10610 key_parts= key->user_defined_key_parts;
10611 for (j= 0; j < key_parts; j++, key_part_info++)
10612 bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);
10613 }
10614 else
10615 {
10616 uint field_no= table_share->fields;
10617 ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));
10618 }
10619 bitmap_array+= sizeof(MY_BITMAP);
10620 }
10621 m_key_fields[i]= NULL;
10622 }
10623
10624 set_dbname(name);
10625 set_tabname(name);
10626
10627 if ((res= check_ndb_connection(thd)) != 0)
10628 {
10629 local_close(thd, FALSE);
10630 DBUG_RETURN(res);
10631 }
10632
10633 // Init table lock structure
10634 /* ndb_share reference handler */
10635 if ((m_share=get_share(name, table, FALSE)) == 0)
10636 {
10637 /**
10638 * No share present...we must create one
10639 */
10640 if (opt_ndb_extra_logging > 19)
10641 {
10642 sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",
10643 name);
10644 }
10645 Ndb* ndb= check_ndb_in_thd(thd);
10646 ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),
10647 m_dbname, m_tabname, table);
10648 if ((m_share=get_share(name, table, FALSE)) == 0)
10649 {
10650 local_close(thd, FALSE);
10651 DBUG_RETURN(1);
10652 }
10653 }
10654
10655 DBUG_PRINT("NDB_SHARE", ("%s handler use_count: %u",
10656 m_share->key, m_share->use_count));
10657 thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
10658
10659 if ((res= get_metadata(thd, name)))
10660 {
10661 local_close(thd, FALSE);
10662 DBUG_RETURN(res);
10663 }
10664
10665 if ((res= update_stats(thd, 1, true)) ||
10666 (res= info(HA_STATUS_CONST)))
10667 {
10668 local_close(thd, TRUE);
10669 DBUG_RETURN(res);
10670 }
10671 if (ndb_binlog_is_read_only())
10672 {
10673 table->db_stat|= HA_READ_ONLY;
10674 sql_print_information("table '%s' opened read only", name);
10675 }
10676 DBUG_RETURN(0);
10677 }
10678
10679 /*
10680 * Support for OPTIMIZE TABLE
10681 * reclaims unused space of deleted rows
10682 * and updates index statistics
10683 */
optimize(THD * thd,HA_CHECK_OPT * check_opt)10684 int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)
10685 {
10686 ulong error, stats_error= 0;
10687 const uint delay= (uint)THDVAR(thd, optimization_delay);
10688
10689 error= ndb_optimize_table(thd, delay);
10690 stats_error= update_stats(thd, 1);
10691 return (error) ? error : stats_error;
10692 }
10693
ndb_optimize_table(THD * thd,uint delay)10694 int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)
10695 {
10696 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10697 Ndb *ndb= thd_ndb->ndb;
10698 NDBDICT *dict= ndb->getDictionary();
10699 int result=0, error= 0;
10700 uint i;
10701 NdbDictionary::OptimizeTableHandle th;
10702 NdbDictionary::OptimizeIndexHandle ih;
10703
10704 DBUG_ENTER("ndb_optimize_table");
10705 if ((error= dict->optimizeTable(*m_table, th)))
10706 {
10707 DBUG_PRINT("info",
10708 ("Optimze table %s returned %d", m_tabname, error));
10709 ERR_RETURN(ndb->getNdbError());
10710 }
10711 while((result= th.next()) == 1)
10712 {
10713 if (thd->killed)
10714 DBUG_RETURN(-1);
10715 my_sleep(1000*delay);
10716 }
10717 if (result == -1 || th.close() == -1)
10718 {
10719 DBUG_PRINT("info",
10720 ("Optimize table %s did not complete", m_tabname));
10721 ERR_RETURN(ndb->getNdbError());
10722 };
10723 for (i= 0; i < MAX_KEY; i++)
10724 {
10725 if (thd->killed)
10726 DBUG_RETURN(-1);
10727 if (m_index[i].status == ACTIVE)
10728 {
10729 const NdbDictionary::Index *index= m_index[i].index;
10730 const NdbDictionary::Index *unique_index= m_index[i].unique_index;
10731
10732 if (index)
10733 {
10734 if ((error= dict->optimizeIndex(*index, ih)))
10735 {
10736 DBUG_PRINT("info",
10737 ("Optimze index %s returned %d",
10738 index->getName(), error));
10739 ERR_RETURN(ndb->getNdbError());
10740
10741 }
10742 while((result= ih.next()) == 1)
10743 {
10744 if (thd->killed)
10745 DBUG_RETURN(-1);
10746 my_sleep(1000*delay);
10747 }
10748 if (result == -1 || ih.close() == -1)
10749 {
10750 DBUG_PRINT("info",
10751 ("Optimize index %s did not complete", index->getName()));
10752 ERR_RETURN(ndb->getNdbError());
10753 }
10754 }
10755 if (unique_index)
10756 {
10757 if ((error= dict->optimizeIndex(*unique_index, ih)))
10758 {
10759 DBUG_PRINT("info",
10760 ("Optimze unique index %s returned %d",
10761 unique_index->getName(), error));
10762 ERR_RETURN(ndb->getNdbError());
10763 }
10764 while((result= ih.next()) == 1)
10765 {
10766 if (thd->killed)
10767 DBUG_RETURN(-1);
10768 my_sleep(1000*delay);
10769 }
10770 if (result == -1 || ih.close() == -1)
10771 {
10772 DBUG_PRINT("info",
10773 ("Optimize index %s did not complete", index->getName()));
10774 ERR_RETURN(ndb->getNdbError());
10775 }
10776 }
10777 }
10778 }
10779 DBUG_RETURN(0);
10780 }
10781
analyze(THD * thd,HA_CHECK_OPT * check_opt)10782 int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)
10783 {
10784 int err;
10785 if ((err= update_stats(thd, 1)) != 0)
10786 return err;
10787 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
10788 THDVAR(thd, index_stat_enable);
10789 if (index_stat_enable)
10790 {
10791 if ((err= analyze_index(thd)) != 0)
10792 return err;
10793 }
10794 return 0;
10795 }
10796
10797 int
analyze_index(THD * thd)10798 ha_ndbcluster::analyze_index(THD *thd)
10799 {
10800 DBUG_ENTER("ha_ndbcluster::analyze_index");
10801
10802 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10803 Ndb *ndb= thd_ndb->ndb;
10804
10805 uint inx_list[MAX_INDEXES];
10806 uint inx_count= 0;
10807
10808 uint inx;
10809 for (inx= 0; inx < table_share->keys; inx++)
10810 {
10811 NDB_INDEX_TYPE idx_type= get_index_type(inx);
10812
10813 if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
10814 idx_type == UNIQUE_ORDERED_INDEX ||
10815 idx_type == ORDERED_INDEX))
10816 {
10817 if (inx_count < MAX_INDEXES)
10818 inx_list[inx_count++]= inx;
10819 }
10820 }
10821
10822 if (inx_count != 0)
10823 {
10824 int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);
10825 if (err != 0)
10826 DBUG_RETURN(err);
10827 }
10828 DBUG_RETURN(0);
10829 }
10830
10831 /*
10832 Set partition info
10833
10834 SYNOPSIS
10835 set_part_info()
10836 part_info
10837
10838 RETURN VALUE
10839 NONE
10840
10841 DESCRIPTION
10842 Set up partition info when handler object created
10843 */
10844
set_part_info(partition_info * part_info,bool early)10845 void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)
10846 {
10847 DBUG_ENTER("ha_ndbcluster::set_part_info");
10848 m_part_info= part_info;
10849 if (!early)
10850 {
10851 m_use_partition_pruning= FALSE;
10852 if (!(m_part_info->part_type == HASH_PARTITION &&
10853 m_part_info->list_of_part_fields &&
10854 !m_part_info->is_sub_partitioned()))
10855 {
10856 /*
10857 PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants
10858 all use MySQL defined partitioning. PARTITION BY KEY uses NDB native
10859 partitioning scheme.
10860 */
10861 m_use_partition_pruning= TRUE;
10862 m_user_defined_partitioning= TRUE;
10863 }
10864 if (m_part_info->part_type == HASH_PARTITION &&
10865 m_part_info->list_of_part_fields &&
10866 partition_info_num_full_part_fields(m_part_info) == 0)
10867 {
10868 /*
10869 CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();
10870 where no primary key is defined uses a hidden key as partition field
10871 and this makes it impossible to use any partition pruning. Partition
10872 pruning requires partitioning based on real fields, also the lack of
10873 a primary key means that all accesses to tables are based on either
10874 full table scans or index scans and they can never be pruned those
10875 scans given that the hidden key is unknown. In write_row, update_row,
10876 and delete_row the normal hidden key handling will fix things.
10877 */
10878 m_use_partition_pruning= FALSE;
10879 }
10880 DBUG_PRINT("info", ("m_use_partition_pruning = %d",
10881 m_use_partition_pruning));
10882 }
10883 DBUG_VOID_RETURN;
10884 }
10885
10886 /**
10887 Close the table
10888 - release resources setup by open()
10889 */
10890
local_close(THD * thd,bool release_metadata_flag)10891 void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)
10892 {
10893 Ndb *ndb;
10894 DBUG_ENTER("ha_ndbcluster::local_close");
10895 if (m_key_fields)
10896 {
10897 MY_BITMAP **inx_bitmap;
10898 for (inx_bitmap= m_key_fields;
10899 (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);
10900 inx_bitmap++)
10901 if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)
10902 bitmap_free(*inx_bitmap);
10903 my_free((char*)m_key_fields, MYF(0));
10904 m_key_fields= NULL;
10905 }
10906 if (m_share)
10907 {
10908 /* ndb_share reference handler free */
10909 DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u",
10910 m_share->key, m_share->use_count));
10911 free_share(&m_share);
10912 }
10913 m_share= 0;
10914 if (release_metadata_flag)
10915 {
10916 ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
10917 release_metadata(thd, ndb);
10918 }
10919 DBUG_VOID_RETURN;
10920 }
10921
close(void)10922 int ha_ndbcluster::close(void)
10923 {
10924 DBUG_ENTER("close");
10925 THD *thd= table->in_use;
10926 local_close(thd, TRUE);
10927 DBUG_RETURN(0);
10928 }
10929
10930
check_ndb_connection(THD * thd)10931 int ha_ndbcluster::check_ndb_connection(THD* thd)
10932 {
10933 Ndb *ndb;
10934 DBUG_ENTER("check_ndb_connection");
10935
10936 if (!(ndb= check_ndb_in_thd(thd, true)))
10937 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10938 if (ndb->setDatabaseName(m_dbname))
10939 {
10940 ERR_RETURN(ndb->getNdbError());
10941 }
10942 DBUG_RETURN(0);
10943 }
10944
10945
ndbcluster_close_connection(handlerton * hton,THD * thd)10946 static int ndbcluster_close_connection(handlerton *hton, THD *thd)
10947 {
10948 Thd_ndb *thd_ndb= get_thd_ndb(thd);
10949 DBUG_ENTER("ndbcluster_close_connection");
10950 if (thd_ndb)
10951 {
10952 Thd_ndb::release(thd_ndb);
10953 thd_set_thd_ndb(thd, NULL);
10954 }
10955 DBUG_RETURN(0);
10956 }
10957
10958
10959 /**
10960 Try to discover one table from NDB.
10961 */
10962
ndbcluster_discover(handlerton * hton,THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)10963 int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
10964 const char *name,
10965 uchar **frmblob,
10966 size_t *frmlen)
10967 {
10968 int error= 0;
10969 NdbError ndb_error;
10970 size_t len;
10971 uchar* data= NULL;
10972 Ndb* ndb;
10973 char key[FN_REFLEN + 1];
10974 DBUG_ENTER("ndbcluster_discover");
10975 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
10976
10977 if (!(ndb= check_ndb_in_thd(thd)))
10978 DBUG_RETURN(HA_ERR_NO_CONNECTION);
10979 if (ndb->setDatabaseName(db))
10980 {
10981 ERR_RETURN(ndb->getNdbError());
10982 }
10983 NDBDICT* dict= ndb->getDictionary();
10984 build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
10985 /* ndb_share reference temporary */
10986 NDB_SHARE *share= get_share(key, 0, FALSE);
10987 if (share)
10988 {
10989 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
10990 share->key, share->use_count));
10991 }
10992 if (share && get_ndb_share_state(share) == NSS_ALTERED)
10993 {
10994 // Frm has been altered on disk, but not yet written to ndb
10995 if (readfrm(key, &data, &len))
10996 {
10997 DBUG_PRINT("error", ("Could not read frm"));
10998 error= 1;
10999 goto err;
11000 }
11001 }
11002 else
11003 {
11004 Ndb_table_guard ndbtab_g(dict, name);
11005 const NDBTAB *tab= ndbtab_g.get_table();
11006 if (!tab)
11007 {
11008 const NdbError err= dict->getNdbError();
11009 if (err.code == 709 || err.code == 723)
11010 {
11011 error= -1;
11012 DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
11013 }
11014 else
11015 {
11016 error= -1;
11017 ndb_error= err;
11018 DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
11019 }
11020 goto err;
11021 }
11022 DBUG_PRINT("info", ("Found table %s", tab->getName()));
11023
11024 len= tab->getFrmLength();
11025 if (len == 0 || tab->getFrmData() == NULL)
11026 {
11027 DBUG_PRINT("error", ("No frm data found."));
11028 error= 1;
11029 goto err;
11030 }
11031
11032 if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))
11033 {
11034 DBUG_PRINT("error", ("Could not unpack table"));
11035 error= 1;
11036 goto err;
11037 }
11038 }
11039 #ifdef HAVE_NDB_BINLOG
11040 if (ndbcluster_check_if_local_table(db, name) &&
11041 !Ndb_dist_priv_util::is_distributed_priv_table(db, name))
11042 {
11043 DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",
11044 db, name));
11045 sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",
11046 db, name);
11047 error= 1;
11048 goto err;
11049 }
11050 #endif
11051 *frmlen= len;
11052 *frmblob= data;
11053
11054 if (share)
11055 {
11056 /* ndb_share reference temporary free */
11057 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
11058 share->key, share->use_count));
11059 free_share(&share);
11060 }
11061
11062 DBUG_RETURN(0);
11063 err:
11064 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
11065 if (share)
11066 {
11067 /* ndb_share reference temporary free */
11068 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
11069 share->key, share->use_count));
11070 free_share(&share);
11071 }
11072
11073 if (ndb_error.code)
11074 {
11075 ERR_RETURN(ndb_error);
11076 }
11077 DBUG_RETURN(error);
11078 }
11079
11080 /**
11081 Check if a table exists in NDB.
11082 */
11083
ndbcluster_table_exists_in_engine(handlerton * hton,THD * thd,const char * db,const char * name)11084 int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd,
11085 const char *db,
11086 const char *name)
11087 {
11088 Ndb* ndb;
11089 DBUG_ENTER("ndbcluster_table_exists_in_engine");
11090 DBUG_PRINT("enter", ("db: %s name: %s", db, name));
11091
11092 if (!(ndb= check_ndb_in_thd(thd)))
11093 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11094 NDBDICT* dict= ndb->getDictionary();
11095 NdbDictionary::Dictionary::List list;
11096 if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
11097 {
11098 ERR_RETURN(dict->getNdbError());
11099 }
11100 for (uint i= 0 ; i < list.count ; i++)
11101 {
11102 NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
11103 if (my_strcasecmp(table_alias_charset, elmt.database, db))
11104 continue;
11105 if (my_strcasecmp(table_alias_charset, elmt.name, name))
11106 continue;
11107 DBUG_PRINT("info", ("Found table"));
11108 DBUG_RETURN(HA_ERR_TABLE_EXIST);
11109 }
11110 DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
11111 }
11112
11113
11114
tables_get_key(const char * entry,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))11115 extern "C" uchar* tables_get_key(const char *entry, size_t *length,
11116 my_bool not_used MY_ATTRIBUTE((unused)))
11117 {
11118 *length= strlen(entry);
11119 return (uchar*) entry;
11120 }
11121
11122
11123 /**
11124 Drop a database in NDB Cluster
11125
11126 @note
11127 add a dummy void function, since stupid handlerton is returning void instead of int...
11128 */
ndbcluster_drop_database_impl(THD * thd,const char * path)11129 int ndbcluster_drop_database_impl(THD *thd, const char *path)
11130 {
11131 DBUG_ENTER("ndbcluster_drop_database");
11132 char dbname[FN_HEADLEN];
11133 Ndb* ndb;
11134 NdbDictionary::Dictionary::List list;
11135 uint i;
11136 char *tabname;
11137 List<char> drop_list;
11138 int ret= 0;
11139 ha_ndbcluster::set_dbname(path, (char *)&dbname);
11140 DBUG_PRINT("enter", ("db: %s", dbname));
11141
11142 if (!(ndb= check_ndb_in_thd(thd)))
11143 DBUG_RETURN(-1);
11144
11145 // List tables in NDB
11146 NDBDICT *dict= ndb->getDictionary();
11147 if (dict->listObjects(list,
11148 NdbDictionary::Object::UserTable) != 0)
11149 DBUG_RETURN(-1);
11150 for (i= 0 ; i < list.count ; i++)
11151 {
11152 NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
11153 DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
11154
11155 // Add only tables that belongs to db
11156 // Ignore Blob part tables - they are deleted when their table
11157 // is deleted.
11158 if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||
11159 IS_NDB_BLOB_PREFIX(elmt.name))
11160 continue;
11161 DBUG_PRINT("info", ("%s must be dropped", elmt.name));
11162 drop_list.push_back(thd->strdup(elmt.name));
11163 }
11164 // Drop any tables belonging to database
11165 char full_path[FN_REFLEN + 1];
11166 char *tmp= full_path +
11167 build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
11168 if (ndb->setDatabaseName(dbname))
11169 {
11170 ERR_RETURN(ndb->getNdbError());
11171 }
11172 List_iterator_fast<char> it(drop_list);
11173 while ((tabname=it++))
11174 {
11175 tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1);
11176 if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))
11177 {
11178 const NdbError err= dict->getNdbError();
11179 if (err.code != 709 && err.code != 723)
11180 {
11181 ret= ndb_to_mysql_error(&err);
11182 }
11183 }
11184 }
11185
11186 dict->invalidateDbGlobal(dbname);
11187 DBUG_RETURN(ret);
11188 }
11189
ndbcluster_drop_database(handlerton * hton,char * path)11190 static void ndbcluster_drop_database(handlerton *hton, char *path)
11191 {
11192 THD *thd= current_thd;
11193 DBUG_ENTER("ndbcluster_drop_database");
11194 /*
11195 Don't allow drop database unless
11196 schema distribution table is setup
11197 */
11198 if (!ndb_schema_share)
11199 {
11200 DBUG_PRINT("info", ("Schema distribution table not setup"));
11201 DBUG_VOID_RETURN;
11202 }
11203 ndbcluster_drop_database_impl(thd, path);
11204 char db[FN_REFLEN];
11205 ha_ndbcluster::set_dbname(path, db);
11206 uint32 table_id= 0, table_version= 0;
11207 /*
11208 Since databases aren't real ndb schema object
11209 they don't have any id/version
11210
11211 But since that id/version is used to make sure that event's on SCHEMA_TABLE
11212 is correct, we set random numbers
11213 */
11214 table_id = (uint32)rand();
11215 table_version = (uint32)rand();
11216 ndbcluster_log_schema_op(thd,
11217 thd->query(), thd->query_length(),
11218 db, "", table_id, table_version,
11219 SOT_DROP_DB, NULL, NULL);
11220 DBUG_VOID_RETURN;
11221 }
11222
ndb_create_table_from_engine(THD * thd,const char * db,const char * table_name)11223 int ndb_create_table_from_engine(THD *thd, const char *db,
11224 const char *table_name)
11225 {
11226 // Copy db and table_name to stack buffers since functions used by
11227 // ha_create_table_from_engine may convert to lowercase on some platforms
11228 char db_buf[FN_REFLEN + 1];
11229 char table_name_buf[FN_REFLEN + 1];
11230 strnmov(db_buf, db, sizeof(db_buf));
11231 strnmov(table_name_buf, table_name, sizeof(table_name_buf));
11232
11233 LEX *old_lex= thd->lex, newlex;
11234 thd->lex= &newlex;
11235 newlex.current_select= NULL;
11236 lex_start(thd);
11237 int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);
11238 thd->lex= old_lex;
11239 return res;
11240 }
11241
11242 /*
11243 find all tables in ndb and discover those needed
11244 */
ndbcluster_find_all_files(THD * thd)11245 int ndbcluster_find_all_files(THD *thd)
11246 {
11247 Ndb* ndb;
11248 char key[FN_REFLEN + 1];
11249 NDBDICT *dict;
11250 int unhandled, retries= 5, skipped;
11251 DBUG_ENTER("ndbcluster_find_all_files");
11252
11253 if (!(ndb= check_ndb_in_thd(thd)))
11254 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11255
11256 dict= ndb->getDictionary();
11257
11258 LINT_INIT(unhandled);
11259 LINT_INIT(skipped);
11260 do
11261 {
11262 NdbDictionary::Dictionary::List list;
11263 if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
11264 ERR_RETURN(dict->getNdbError());
11265 unhandled= 0;
11266 skipped= 0;
11267 retries--;
11268 for (uint i= 0 ; i < list.count ; i++)
11269 {
11270 NDBDICT::List::Element& elmt= list.elements[i];
11271 if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
11272 {
11273 DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
11274 continue;
11275 }
11276 DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name));
11277 if (elmt.state != NDBOBJ::StateOnline &&
11278 elmt.state != NDBOBJ::StateBackup &&
11279 elmt.state != NDBOBJ::StateBuilding)
11280 {
11281 sql_print_information("NDB: skipping setup table %s.%s, in state %d",
11282 elmt.database, elmt.name, elmt.state);
11283 skipped++;
11284 continue;
11285 }
11286
11287 ndb->setDatabaseName(elmt.database);
11288 Ndb_table_guard ndbtab_g(dict, elmt.name);
11289 const NDBTAB *ndbtab= ndbtab_g.get_table();
11290 if (!ndbtab)
11291 {
11292 if (retries == 0)
11293 sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s",
11294 elmt.database, elmt.name,
11295 dict->getNdbError().code,
11296 dict->getNdbError().message);
11297 unhandled++;
11298 continue;
11299 }
11300
11301 if (ndbtab->getFrmLength() == 0)
11302 continue;
11303
11304 /* check if database exists */
11305 char *end= key +
11306 build_table_filename(key, sizeof(key) - 1, elmt.database, "", "", 0);
11307 if (my_access(key, F_OK))
11308 {
11309 /* no such database defined, skip table */
11310 continue;
11311 }
11312 /* finalize construction of path */
11313 end+= tablename_to_filename(elmt.name, end,
11314 sizeof(key)-(end-key));
11315 uchar *data= 0, *pack_data= 0;
11316 size_t length, pack_length;
11317 int discover= 0;
11318 if (readfrm(key, &data, &length) ||
11319 packfrm(data, length, &pack_data, &pack_length))
11320 {
11321 discover= 1;
11322 sql_print_information("NDB: missing frm for %s.%s, discovering...",
11323 elmt.database, elmt.name);
11324 }
11325 else if (cmp_frm(ndbtab, pack_data, pack_length))
11326 {
11327 /* ndb_share reference temporary */
11328 NDB_SHARE *share= get_share(key, 0, FALSE);
11329 if (share)
11330 {
11331 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
11332 share->key, share->use_count));
11333 }
11334 if (!share || get_ndb_share_state(share) != NSS_ALTERED)
11335 {
11336 discover= 1;
11337 sql_print_information("NDB: mismatch in frm for %s.%s, discovering...",
11338 elmt.database, elmt.name);
11339 }
11340 if (share)
11341 {
11342 /* ndb_share reference temporary free */
11343 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
11344 share->key, share->use_count));
11345 free_share(&share);
11346 }
11347 }
11348 my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR));
11349 my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR));
11350
11351 if (discover)
11352 {
11353 /* ToDo 4.1 database needs to be created if missing */
11354 if (ndb_create_table_from_engine(thd, elmt.database, elmt.name))
11355 {
11356 /* ToDo 4.1 handle error */
11357 }
11358 }
11359 else
11360 {
11361 /* set up replication for this table */
11362 ndbcluster_create_binlog_setup(thd, ndb, key, end-key,
11363 elmt.database, elmt.name,
11364 0);
11365 }
11366 }
11367 }
11368 while (unhandled && retries);
11369
11370 DBUG_RETURN(-(skipped + unhandled));
11371 }
11372
11373
11374 static int
ndbcluster_find_files(handlerton * hton,THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)11375 ndbcluster_find_files(handlerton *hton, THD *thd,
11376 const char *db, const char *path,
11377 const char *wild, bool dir, List<LEX_STRING> *files)
11378 {
11379 DBUG_ENTER("ndbcluster_find_files");
11380 DBUG_PRINT("enter", ("db: %s", db));
11381 { // extra bracket to avoid gcc 2.95.3 warning
11382 uint i;
11383 Thd_ndb *thd_ndb;
11384 Ndb* ndb;
11385 char name[FN_REFLEN + 1];
11386 HASH ndb_tables, ok_tables;
11387 NDBDICT::List list;
11388
11389 if (!(ndb= check_ndb_in_thd(thd)))
11390 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11391 thd_ndb= get_thd_ndb(thd);
11392
11393 if (dir)
11394 DBUG_RETURN(0); // Discover of databases not yet supported
11395
11396 Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);
11397 if (ndb_global_schema_lock_guard.lock())
11398 DBUG_RETURN(HA_ERR_NO_CONNECTION);
11399
11400 // List tables in NDB
11401 NDBDICT *dict= ndb->getDictionary();
11402 if (dict->listObjects(list,
11403 NdbDictionary::Object::UserTable) != 0)
11404 ERR_RETURN(dict->getNdbError());
11405
11406 if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,
11407 (my_hash_get_key)tables_get_key,0,0))
11408 {
11409 DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
11410 DBUG_RETURN(-1);
11411 }
11412
11413 if (my_hash_init(&ok_tables, system_charset_info,32,0,0,
11414 (my_hash_get_key)tables_get_key,0,0))
11415 {
11416 DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
11417 my_hash_free(&ndb_tables);
11418 DBUG_RETURN(-1);
11419 }
11420
11421 for (i= 0 ; i < list.count ; i++)
11422 {
11423 NDBDICT::List::Element& elmt= list.elements[i];
11424 if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
11425 {
11426 DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
11427 continue;
11428 }
11429 DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
11430
11431 // Add only tables that belongs to db
11432 if (my_strcasecmp(system_charset_info, elmt.database, db))
11433 continue;
11434
11435 // Apply wildcard to list of tables in NDB
11436 if (wild)
11437 {
11438 if (lower_case_table_names)
11439 {
11440 if (wild_case_compare(files_charset_info, elmt.name, wild))
11441 continue;
11442 }
11443 else if (wild_compare(elmt.name,wild,0))
11444 continue;
11445 }
11446 DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));
11447 my_hash_insert(&ndb_tables, (uchar*)thd->strdup(elmt.name));
11448 }
11449
11450 LEX_STRING *file_name;
11451 List_iterator<LEX_STRING> it(*files);
11452 List<char> delete_list;
11453 char *file_name_str;
11454 while ((file_name=it++))
11455 {
11456 bool file_on_disk= FALSE;
11457 DBUG_PRINT("info", ("%s", file_name->str));
11458 if (my_hash_search(&ndb_tables,
11459 (const uchar*)file_name->str, file_name->length))
11460 {
11461 build_table_filename(name, sizeof(name) - 1, db,
11462 file_name->str, reg_ext, 0);
11463 if (my_access(name, F_OK))
11464 {
11465 DBUG_PRINT("info", ("Table %s listed and need discovery",
11466 file_name->str));
11467 if (ndb_create_table_from_engine(thd, db, file_name->str))
11468 {
11469 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
11470 ER_TABLE_EXISTS_ERROR,
11471 "Discover of table %s.%s failed",
11472 db, file_name->str);
11473 continue;
11474 }
11475 }
11476 DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));
11477 file_on_disk= TRUE;
11478 }
11479
11480 // Check for .ndb file with this name
11481 build_table_filename(name, sizeof(name) - 1, db,
11482 file_name->str, ha_ndb_ext, 0);
11483 DBUG_PRINT("info", ("Check access for %s", name));
11484 if (my_access(name, F_OK))
11485 {
11486 DBUG_PRINT("info", ("%s did not exist on disk", name));
11487 // .ndb file did not exist on disk, another table type
11488 if (file_on_disk)
11489 {
11490 // Ignore this ndb table
11491 uchar *record= my_hash_search(&ndb_tables,
11492 (const uchar*) file_name->str,
11493 file_name->length);
11494 DBUG_ASSERT(record);
11495 my_hash_delete(&ndb_tables, record);
11496 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
11497 ER_TABLE_EXISTS_ERROR,
11498 "Local table %s.%s shadows ndb table",
11499 db, file_name->str);
11500 }
11501 continue;
11502 }
11503 if (file_on_disk)
11504 {
11505 // File existed in NDB and as frm file, put in ok_tables list
11506 my_hash_insert(&ok_tables, (uchar*) file_name->str);
11507 continue;
11508 }
11509 DBUG_PRINT("info", ("%s existed on disk", name));
11510 // The .ndb file exists on disk, but it's not in list of tables in ndb
11511 // Verify that handler agrees table is gone.
11512 if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==
11513 HA_ERR_NO_SUCH_TABLE)
11514 {
11515 DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));
11516 it.remove();
11517 // Put in list of tables to remove from disk
11518 delete_list.push_back(thd->strdup(file_name->str));
11519 }
11520 }
11521
11522 /* setup logging to binlog for all discovered tables */
11523 {
11524 char *end, *end1= name +
11525 build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
11526 for (i= 0; i < ok_tables.records; i++)
11527 {
11528 file_name_str= (char*)my_hash_element(&ok_tables, i);
11529 end= end1 +
11530 tablename_to_filename(file_name_str, end1, sizeof(name) - (end1 - name));
11531 ndbcluster_create_binlog_setup(thd, ndb, name, end-name,
11532 db, file_name_str, 0);
11533 }
11534 }
11535
11536 // Check for new files to discover
11537 DBUG_PRINT("info", ("Checking for new files to discover"));
11538 List<char> create_list;
11539 for (i= 0 ; i < ndb_tables.records ; i++)
11540 {
11541 file_name_str= (char*) my_hash_element(&ndb_tables, i);
11542 if (!my_hash_search(&ok_tables,
11543 (const uchar*) file_name_str, strlen(file_name_str)))
11544 {
11545 build_table_filename(name, sizeof(name) - 1,
11546 db, file_name_str, reg_ext, 0);
11547 if (my_access(name, F_OK))
11548 {
11549 DBUG_PRINT("info", ("%s must be discovered", file_name_str));
11550 // File is in list of ndb tables and not in ok_tables
11551 // This table need to be created
11552 create_list.push_back(thd->strdup(file_name_str));
11553 }
11554 }
11555 }
11556
11557 #ifndef NDB_NO_MYSQL_RM_TABLE_PART2
11558 /*
11559 Delete old files
11560
11561 ndbcluster_find_files() may be called from I_S code and ndbcluster_binlog
11562 thread in situations when some tables are already open. This means that
11563 code below will try to obtain exclusive metadata lock on some table
11564 while holding shared meta-data lock on other tables. This might lead to a
11565 deadlock but such a deadlock should be detected by MDL deadlock detector.
11566 */
11567 List_iterator_fast<char> it3(delete_list);
11568 while ((file_name_str= it3++))
11569 {
11570 DBUG_PRINT("info", ("Removing table %s/%s", db, file_name_str));
11571 // Delete the table and all related files
11572 TABLE_LIST table_list;
11573 table_list.init_one_table(db, strlen(db),
11574 file_name_str, strlen(file_name_str),
11575 file_name_str,
11576 TL_WRITE);
11577 table_list.mdl_request.set_type(MDL_EXCLUSIVE);
11578 /*
11579 set TNO_NO_NDB_DROP_TABLE flag to not drop ndb table.
11580 it should not exist anyways
11581 */
11582 thd_ndb->options|= TNO_NO_NDB_DROP_TABLE;
11583 (void)mysql_rm_table_part2(thd, &table_list,
11584 false, /* if_exists */
11585 false, /* drop_temporary */
11586 false, /* drop_view */
11587 true /* dont_log_query*/);
11588 thd_ndb->options&= ~TNO_NO_NDB_DROP_TABLE;
11589 trans_commit_implicit(thd); /* Safety, should be unnecessary. */
11590 thd->mdl_context.release_transactional_locks();
11591 /* Clear error message that is returned when table is deleted */
11592 thd->clear_error();
11593 }
11594 #endif
11595
11596 // Create new files
11597 List_iterator_fast<char> it2(create_list);
11598 while ((file_name_str=it2++))
11599 {
11600 DBUG_PRINT("info", ("Table %s need discovery", file_name_str));
11601 if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)
11602 {
11603 LEX_STRING *tmp_file_name= 0;
11604 tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,
11605 strlen(file_name_str), TRUE);
11606 files->push_back(tmp_file_name);
11607 }
11608 }
11609
11610 my_hash_free(&ok_tables);
11611 my_hash_free(&ndb_tables);
11612
11613 // Delete schema file from files
11614 if (!strcmp(db, NDB_REP_DB))
11615 {
11616 uint count = 0;
11617 while (count++ < files->elements)
11618 {
11619 file_name = (LEX_STRING *)files->pop();
11620 if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))
11621 {
11622 DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user",
11623 NDB_REP_DB, NDB_SCHEMA_TABLE));
11624 continue;
11625 }
11626 files->push_back(file_name);
11627 }
11628 }
11629 } // extra bracket to avoid gcc 2.95.3 warning
11630 DBUG_RETURN(0);
11631 }
11632
11633
11634 /*
11635 Initialise all gloal variables before creating
11636 a NDB Cluster table handler
11637 */
11638
11639 /* Call back after cluster connect */
connect_callback()11640 static int connect_callback()
11641 {
11642 pthread_mutex_lock(&LOCK_ndb_util_thread);
11643 update_status_variables(NULL, &g_ndb_status,
11644 g_ndb_cluster_connection);
11645
11646 uint node_id, i= 0;
11647 Ndb_cluster_connection_node_iter node_iter;
11648 memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map));
11649 while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter)))
11650 g_node_id_map[node_id]= i++;
11651
11652 pthread_cond_signal(&COND_ndb_util_thread);
11653 pthread_mutex_unlock(&LOCK_ndb_util_thread);
11654 return 0;
11655 }
11656
11657 #ifndef NDB_NO_WAIT_SETUP
ndb_wait_setup_func_impl(ulong max_wait)11658 static int ndb_wait_setup_func_impl(ulong max_wait)
11659 {
11660 DBUG_ENTER("ndb_wait_setup_func_impl");
11661
11662 pthread_mutex_lock(&ndbcluster_mutex);
11663
11664 struct timespec abstime;
11665 set_timespec(abstime, 1);
11666
11667 while (!ndb_setup_complete && max_wait)
11668 {
11669 int rc= pthread_cond_timedwait(&COND_ndb_setup_complete,
11670 &ndbcluster_mutex,
11671 &abstime);
11672 if (rc)
11673 {
11674 if (rc == ETIMEDOUT)
11675 {
11676 DBUG_PRINT("info", ("1s elapsed waiting"));
11677 max_wait--;
11678 set_timespec(abstime, 1); /* 1 second from now*/
11679 }
11680 else
11681 {
11682 DBUG_PRINT("info", ("Bad pthread_cond_timedwait rc : %u",
11683 rc));
11684 assert(false);
11685 break;
11686 }
11687 }
11688 }
11689
11690 pthread_mutex_unlock(&ndbcluster_mutex);
11691
11692 DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);
11693 }
11694
11695 int(*ndb_wait_setup_func)(ulong) = 0;
11696 #endif
11697 extern int ndb_dictionary_is_mysqld;
11698
ndbcluster_init(void * p)11699 static int ndbcluster_init(void *p)
11700 {
11701 DBUG_ENTER("ndbcluster_init");
11702
11703 if (ndbcluster_inited)
11704 DBUG_RETURN(FALSE);
11705
11706 pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
11707 pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
11708 pthread_cond_init(&COND_ndb_util_thread, NULL);
11709 pthread_cond_init(&COND_ndb_util_ready, NULL);
11710 pthread_cond_init(&COND_ndb_setup_complete, NULL);
11711 ndb_util_thread_running= -1;
11712 pthread_mutex_init(&LOCK_ndb_index_stat_thread, MY_MUTEX_INIT_FAST);
11713 pthread_cond_init(&COND_ndb_index_stat_thread, NULL);
11714 pthread_cond_init(&COND_ndb_index_stat_ready, NULL);
11715 pthread_mutex_init(&ndb_index_stat_list_mutex, MY_MUTEX_INIT_FAST);
11716 pthread_mutex_init(&ndb_index_stat_stat_mutex, MY_MUTEX_INIT_FAST);
11717 pthread_cond_init(&ndb_index_stat_stat_cond, NULL);
11718 ndb_index_stat_thread_running= -1;
11719 ndbcluster_terminating= 0;
11720 ndb_dictionary_is_mysqld= 1;
11721 ndb_setup_complete= 0;
11722 ndbcluster_hton= (handlerton *)p;
11723 ndbcluster_global_schema_lock_init(ndbcluster_hton);
11724
11725 {
11726 handlerton *h= ndbcluster_hton;
11727 h->state= SHOW_OPTION_YES;
11728 h->db_type= DB_TYPE_NDBCLUSTER;
11729 h->close_connection= ndbcluster_close_connection;
11730 h->commit= ndbcluster_commit;
11731 h->rollback= ndbcluster_rollback;
11732 h->create= ndbcluster_create_handler; /* Create a new handler */
11733 h->drop_database= ndbcluster_drop_database; /* Drop a database */
11734 h->panic= ndbcluster_end; /* Panic call */
11735 h->show_status= ndbcluster_show_status; /* Show status */
11736 h->alter_tablespace= ndbcluster_alter_tablespace; /* Show status */
11737 h->partition_flags= ndbcluster_partition_flags; /* Partition flags */
11738 h->alter_table_flags=
11739 ndbcluster_alter_table_flags; /* Alter table flags */
11740 #if MYSQL_VERSION_ID >= 50501
11741 h->fill_is_table= ndbcluster_fill_is_table;
11742 #else
11743 h->fill_files_table= ndbcluster_fill_files_table;
11744 #endif
11745 ndbcluster_binlog_init_handlerton();
11746 h->flags= HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED;
11747 h->discover= ndbcluster_discover;
11748 h->find_files= ndbcluster_find_files;
11749 h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
11750 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
11751 h->make_pushed_join= ndbcluster_make_pushed_join;
11752 #endif
11753 }
11754
11755 // Initialize ndb interface
11756 ndb_init_internal();
11757
11758 /* allocate connection resources and connect to cluster */
11759 const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);
11760 if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,
11761 opt_ndb_cluster_connection_pool,
11762 (global_opti_node_select & 1),
11763 opt_ndb_connectstring,
11764 opt_ndb_nodeid))
11765 {
11766 DBUG_PRINT("error", ("Could not initiate connection to cluster"));
11767 goto ndbcluster_init_error;
11768 }
11769
11770 (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,
11771 (my_hash_get_key) ndbcluster_get_key,0,0);
11772 /* start the ndb injector thread */
11773 if (ndbcluster_binlog_start())
11774 {
11775 DBUG_PRINT("error", ("Could start the injector thread"));
11776 goto ndbcluster_init_error;
11777 }
11778
11779 // Create utility thread
11780 pthread_t tmp;
11781 if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
11782 {
11783 DBUG_PRINT("error", ("Could not create ndb utility thread"));
11784 my_hash_free(&ndbcluster_open_tables);
11785 pthread_mutex_destroy(&ndbcluster_mutex);
11786 pthread_mutex_destroy(&LOCK_ndb_util_thread);
11787 pthread_cond_destroy(&COND_ndb_util_thread);
11788 pthread_cond_destroy(&COND_ndb_util_ready);
11789 pthread_cond_destroy(&COND_ndb_setup_complete);
11790 goto ndbcluster_init_error;
11791 }
11792
11793 /* Wait for the util thread to start */
11794 pthread_mutex_lock(&LOCK_ndb_util_thread);
11795 while (ndb_util_thread_running < 0)
11796 pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread);
11797 pthread_mutex_unlock(&LOCK_ndb_util_thread);
11798
11799 if (!ndb_util_thread_running)
11800 {
11801 DBUG_PRINT("error", ("ndb utility thread exited prematurely"));
11802 my_hash_free(&ndbcluster_open_tables);
11803 pthread_mutex_destroy(&ndbcluster_mutex);
11804 pthread_mutex_destroy(&LOCK_ndb_util_thread);
11805 pthread_cond_destroy(&COND_ndb_util_thread);
11806 pthread_cond_destroy(&COND_ndb_util_ready);
11807 pthread_cond_destroy(&COND_ndb_setup_complete);
11808 goto ndbcluster_init_error;
11809 }
11810
11811 // Create index statistics thread
11812 pthread_t tmp2;
11813 if (pthread_create(&tmp2, &connection_attrib, ndb_index_stat_thread_func, 0))
11814 {
11815 DBUG_PRINT("error", ("Could not create ndb index statistics thread"));
11816 my_hash_free(&ndbcluster_open_tables);
11817 pthread_mutex_destroy(&ndbcluster_mutex);
11818 pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11819 pthread_cond_destroy(&COND_ndb_index_stat_thread);
11820 pthread_cond_destroy(&COND_ndb_index_stat_ready);
11821 pthread_mutex_destroy(&ndb_index_stat_list_mutex);
11822 pthread_mutex_destroy(&ndb_index_stat_stat_mutex);
11823 pthread_cond_destroy(&ndb_index_stat_stat_cond);
11824 goto ndbcluster_init_error;
11825 }
11826
11827 /* Wait for the index statistics thread to start */
11828 pthread_mutex_lock(&LOCK_ndb_index_stat_thread);
11829 while (ndb_index_stat_thread_running < 0)
11830 pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);
11831 pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);
11832
11833 if (!ndb_index_stat_thread_running)
11834 {
11835 DBUG_PRINT("error", ("ndb index statistics thread exited prematurely"));
11836 my_hash_free(&ndbcluster_open_tables);
11837 pthread_mutex_destroy(&ndbcluster_mutex);
11838 pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11839 pthread_cond_destroy(&COND_ndb_index_stat_thread);
11840 pthread_cond_destroy(&COND_ndb_index_stat_ready);
11841 pthread_mutex_destroy(&ndb_index_stat_list_mutex);
11842 pthread_mutex_destroy(&ndb_index_stat_stat_mutex);
11843 pthread_cond_destroy(&ndb_index_stat_stat_cond);
11844 goto ndbcluster_init_error;
11845 }
11846
11847 #ifndef NDB_NO_WAIT_SETUP
11848 ndb_wait_setup_func= ndb_wait_setup_func_impl;
11849 #endif
11850
11851 memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));
11852
11853 ndbcluster_inited= 1;
11854 DBUG_RETURN(FALSE);
11855
11856 ndbcluster_init_error:
11857 /* disconnect from cluster and free connection resources */
11858 ndbcluster_disconnect();
11859 ndbcluster_hton->state= SHOW_OPTION_DISABLED; // If we couldn't use handler
11860
11861 ndbcluster_global_schema_lock_deinit();
11862
11863 DBUG_RETURN(TRUE);
11864 }
11865
11866 #ifndef DBUG_OFF
11867 static
11868 const char*
get_share_state_string(NDB_SHARE_STATE s)11869 get_share_state_string(NDB_SHARE_STATE s)
11870 {
11871 switch(s) {
11872 case NSS_INITIAL:
11873 return "NSS_INITIAL";
11874 case NSS_ALTERED:
11875 return "NSS_ALTERED";
11876 case NSS_DROPPED:
11877 return "NSS_DROPPED";
11878 }
11879 assert(false);
11880 return "<unknown>";
11881 }
11882 #endif
11883
11884 int ndbcluster_binlog_end(THD *thd);
11885
ndbcluster_end(handlerton * hton,ha_panic_function type)11886 static int ndbcluster_end(handlerton *hton, ha_panic_function type)
11887 {
11888 DBUG_ENTER("ndbcluster_end");
11889
11890 if (!ndbcluster_inited)
11891 DBUG_RETURN(0);
11892 ndbcluster_inited= 0;
11893
11894 /* wait for index stat thread to finish */
11895 sql_print_information("Stopping Cluster Index Statistics thread");
11896 pthread_mutex_lock(&LOCK_ndb_index_stat_thread);
11897 ndbcluster_terminating= 1;
11898 pthread_cond_signal(&COND_ndb_index_stat_thread);
11899 while (ndb_index_stat_thread_running > 0)
11900 pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);
11901 pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);
11902
11903 /* wait for util and binlog thread to finish */
11904 ndbcluster_binlog_end(NULL);
11905
11906 {
11907 pthread_mutex_lock(&ndbcluster_mutex);
11908 uint save = ndbcluster_open_tables.records; (void)save;
11909 while (ndbcluster_open_tables.records)
11910 {
11911 NDB_SHARE *share=
11912 (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);
11913 #ifndef DBUG_OFF
11914 fprintf(stderr,
11915 "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
11916 share->key, share->use_count,
11917 get_share_state_string(share->state),
11918 (uint)share->state);
11919 #endif
11920 ndbcluster_real_free_share(&share);
11921 }
11922 pthread_mutex_unlock(&ndbcluster_mutex);
11923 DBUG_ASSERT(save == 0);
11924 }
11925 my_hash_free(&ndbcluster_open_tables);
11926
11927 ndb_index_stat_end();
11928 ndbcluster_disconnect();
11929
11930 ndbcluster_global_schema_lock_deinit();
11931
11932 // cleanup ndb interface
11933 ndb_end_internal();
11934
11935 pthread_mutex_destroy(&ndbcluster_mutex);
11936 pthread_mutex_destroy(&LOCK_ndb_util_thread);
11937 pthread_cond_destroy(&COND_ndb_util_thread);
11938 pthread_cond_destroy(&COND_ndb_util_ready);
11939 pthread_cond_destroy(&COND_ndb_setup_complete);
11940 pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11941 pthread_cond_destroy(&COND_ndb_index_stat_thread);
11942 pthread_cond_destroy(&COND_ndb_index_stat_ready);
11943
11944 DBUG_RETURN(0);
11945 }
11946
print_error(int error,myf errflag)11947 void ha_ndbcluster::print_error(int error, myf errflag)
11948 {
11949 DBUG_ENTER("ha_ndbcluster::print_error");
11950 DBUG_PRINT("enter", ("error: %d", error));
11951
11952 if (error == HA_ERR_NO_PARTITION_FOUND)
11953 m_part_info->print_no_partition_found(table);
11954 else
11955 {
11956 if (error == HA_ERR_FOUND_DUPP_KEY &&
11957 (table == NULL || table->file == NULL))
11958 {
11959 /*
11960 This is a sideffect of 'ndbcluster_print_error' (called from
11961 'ndbcluster_commit' and 'ndbcluster_rollback') which realises
11962 that it "knows nothing" and creates a brand new ha_ndbcluster
11963 in order to be able to call the print_error() function.
11964 Unfortunately the new ha_ndbcluster hasn't been open()ed
11965 and thus table pointer etc. is not set. Since handler::print_error()
11966 will use that pointer without checking for NULL(it naturally
11967 assumes an error can only be returned when the handler is open)
11968 this would crash the mysqld unless it's handled here.
11969 */
11970 my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);
11971 DBUG_VOID_RETURN;
11972 }
11973
11974 handler::print_error(error, errflag);
11975 }
11976 DBUG_VOID_RETURN;
11977 }
11978
11979
11980 /**
11981 Static error print function called from static handler method
11982 ndbcluster_commit and ndbcluster_rollback.
11983 */
11984
ndbcluster_print_error(int error,const NdbOperation * error_op)11985 void ndbcluster_print_error(int error, const NdbOperation *error_op)
11986 {
11987 DBUG_ENTER("ndbcluster_print_error");
11988 TABLE_SHARE share;
11989 const char *tab_name= (error_op) ? error_op->getTableName() : "";
11990 if (tab_name == NULL)
11991 {
11992 DBUG_ASSERT(tab_name != NULL);
11993 tab_name= "";
11994 }
11995 share.db.str= (char*) "";
11996 share.db.length= 0;
11997 share.table_name.str= (char *) tab_name;
11998 share.table_name.length= strlen(tab_name);
11999 ha_ndbcluster error_handler(ndbcluster_hton, &share);
12000 error_handler.print_error(error, MYF(0));
12001 DBUG_VOID_RETURN;
12002 }
12003
12004 /**
12005 Set a given location from full pathname to database name.
12006 */
12007
set_dbname(const char * path_name,char * dbname)12008 void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
12009 {
12010 char *end, *ptr, *tmp_name;
12011 char tmp_buff[FN_REFLEN + 1];
12012
12013 tmp_name= tmp_buff;
12014 /* Scan name from the end */
12015 ptr= strend(path_name)-1;
12016 while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12017 ptr--;
12018 }
12019 ptr--;
12020 end= ptr;
12021 while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12022 ptr--;
12023 }
12024 uint name_len= end - ptr;
12025 memcpy(tmp_name, ptr + 1, name_len);
12026 tmp_name[name_len]= '\0';
12027 filename_to_tablename(tmp_name, dbname, sizeof(tmp_buff) - 1);
12028 }
12029
12030 /**
12031 Set m_dbname from full pathname to table file.
12032 */
12033
set_dbname(const char * path_name)12034 void ha_ndbcluster::set_dbname(const char *path_name)
12035 {
12036 set_dbname(path_name, m_dbname);
12037 }
12038
12039 /**
12040 Set a given location from full pathname to table file.
12041 */
12042
12043 void
set_tabname(const char * path_name,char * tabname)12044 ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
12045 {
12046 char *end, *ptr, *tmp_name;
12047 char tmp_buff[FN_REFLEN + 1];
12048
12049 tmp_name= tmp_buff;
12050 /* Scan name from the end */
12051 end= strend(path_name)-1;
12052 ptr= end;
12053 while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12054 ptr--;
12055 }
12056 uint name_len= end - ptr;
12057 memcpy(tmp_name, ptr + 1, end - ptr);
12058 tmp_name[name_len]= '\0';
12059 filename_to_tablename(tmp_name, tabname, sizeof(tmp_buff) - 1);
12060 }
12061
12062 /**
12063 Set m_tabname from full pathname to table file.
12064 */
12065
set_tabname(const char * path_name)12066 void ha_ndbcluster::set_tabname(const char *path_name)
12067 {
12068 set_tabname(path_name, m_tabname);
12069 }
12070
12071
12072 /*
12073 If there are no stored stats, should we do a tree-dive on all db
12074 nodes. The result is fairly good but does mean a round-trip.
12075 */
12076 static const bool g_ndb_records_in_range_tree_dive= false;
12077
12078 /* Determine roughly how many records are in the range specified */
12079 ha_rows
records_in_range(uint inx,key_range * min_key,key_range * max_key)12080 ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
12081 key_range *max_key)
12082 {
12083 KEY *key_info= table->key_info + inx;
12084 uint key_length= key_info->key_length;
12085 NDB_INDEX_TYPE idx_type= get_index_type(inx);
12086
12087 DBUG_ENTER("records_in_range");
12088 // Prevent partial read of hash indexes by returning HA_POS_ERROR
12089 if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
12090 ((min_key && min_key->length < key_length) ||
12091 (max_key && max_key->length < key_length)))
12092 DBUG_RETURN(HA_POS_ERROR);
12093
12094 // Read from hash index with full key
12095 // This is a "const" table which returns only one record!
12096 if ((idx_type != ORDERED_INDEX) &&
12097 ((min_key && min_key->length == key_length) &&
12098 (max_key && max_key->length == key_length) &&
12099 (min_key->key==max_key->key ||
12100 memcmp(min_key->key, max_key->key, key_length)==0)))
12101 DBUG_RETURN(1);
12102
12103 // XXX why this if
12104 if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
12105 idx_type == UNIQUE_ORDERED_INDEX ||
12106 idx_type == ORDERED_INDEX))
12107 {
12108 THD *thd= current_thd;
12109 const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
12110 THDVAR(thd, index_stat_enable);
12111
12112 if (index_stat_enable)
12113 {
12114 ha_rows rows= HA_POS_ERROR;
12115 int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);
12116 if (err == 0)
12117 {
12118 /**
12119 * optmizer thinks that all values < 2 are exact...but
12120 * but we don't provide exact statistics
12121 */
12122 if (rows < 2)
12123 rows = 2;
12124 DBUG_RETURN(rows);
12125 }
12126 if (err != 0 &&
12127 /* no stats is not unexpected error */
12128 err != NdbIndexStat::NoIndexStats &&
12129 /* warning was printed at first error */
12130 err != Ndb_index_stat_error_HAS_ERROR)
12131 {
12132 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
12133 ER_CANT_GET_STAT, /* pun? */
12134 "index stats (RIR) for key %s:"
12135 " unexpected error %d",
12136 key_info->name, err);
12137 }
12138 /*fall through*/
12139 }
12140
12141 if (g_ndb_records_in_range_tree_dive)
12142 {
12143 NDB_INDEX_DATA& d=m_index[inx];
12144 const NDBINDEX* index= d.index;
12145 Ndb *ndb= get_ndb(thd);
12146 NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;
12147 NdbTransaction* trans=NULL;
12148 int res=0;
12149 Uint64 rows;
12150
12151 do
12152 {
12153 if ((trans=active_trans) == NULL ||
12154 trans->commitStatus() != NdbTransaction::Started)
12155 {
12156 DBUG_PRINT("info", ("no active trans"));
12157 if (! (trans=ndb->startTransaction()))
12158 ERR_BREAK(ndb->getNdbError(), res);
12159 }
12160
12161 /* Create an IndexBound struct for the keys */
12162 NdbIndexScanOperation::IndexBound ib;
12163 compute_index_bounds(ib,
12164 key_info,
12165 min_key,
12166 max_key,
12167 0);
12168
12169 ib.range_no= 0;
12170
12171 NdbIndexStat is;
12172 if (is.records_in_range(index,
12173 trans,
12174 d.ndb_record_key,
12175 m_ndb_record,
12176 &ib,
12177 0,
12178 &rows,
12179 0) == -1)
12180 ERR_BREAK(is.getNdbError(), res);
12181 } while (0);
12182
12183 if (trans != active_trans && rows == 0)
12184 rows = 1;
12185 if (trans != active_trans && trans != NULL)
12186 ndb->closeTransaction(trans);
12187 if (res == 0)
12188 DBUG_RETURN(rows);
12189 /*fall through*/
12190 }
12191 }
12192
12193 /* Use simple heuristics to estimate fraction
12194 of 'stats.record' returned from range.
12195 */
12196 do
12197 {
12198 if (stats.records == ~(ha_rows)0 || stats.records == 0)
12199 {
12200 /* Refresh statistics, only read from datanodes if 'use_exact_count' */
12201 THD *thd= current_thd;
12202 if (update_stats(thd, THDVAR(thd, use_exact_count)))
12203 break;
12204 }
12205
12206 Uint64 rows;
12207 Uint64 table_rows= stats.records;
12208 size_t eq_bound_len= 0;
12209 size_t min_key_length= (min_key) ? min_key->length : 0;
12210 size_t max_key_length= (max_key) ? max_key->length : 0;
12211
12212 // Might have an closed/open range bound:
12213 // Low range open
12214 if (!min_key_length)
12215 {
12216 rows= (!max_key_length)
12217 ? table_rows // No range was specified
12218 : table_rows/10; // -oo .. <high range> -> 10% selectivity
12219 }
12220 // High range open
12221 else if (!max_key_length)
12222 {
12223 rows= table_rows/10; // <low range>..oo -> 10% selectivity
12224 }
12225 else
12226 {
12227 size_t bounds_len= MIN(min_key_length,max_key_length);
12228 uint eq_bound_len= 0;
12229 uint eq_bound_offs= 0;
12230
12231 KEY_PART_INFO* key_part= key_info->key_part;
12232 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
12233 for (; key_part != end; key_part++)
12234 {
12235 uint part_length= key_part->store_length;
12236 if (eq_bound_offs+part_length > bounds_len ||
12237 memcmp(&min_key->key[eq_bound_offs],
12238 &max_key->key[eq_bound_offs],
12239 part_length))
12240 {
12241 break;
12242 }
12243 eq_bound_len+= key_part->length;
12244 eq_bound_offs+= part_length;
12245 }
12246
12247 if (!eq_bound_len)
12248 {
12249 rows= table_rows/20; // <low range>..<high range> -> 5%
12250 }
12251 else
12252 {
12253 // Has an equality range on a leading part of 'key_length':
12254 // - Assume reduced selectivity for non-unique indexes
12255 // by decreasing 'eq_fraction' by 20%
12256 // - Assume equal selectivity for all eq_parts in key.
12257
12258 double eq_fraction = (double)(eq_bound_len) / key_length;
12259 if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
12260 eq_fraction/= 1.20;
12261 if (eq_fraction >= 1.0) // Exact match -> 1 row
12262 DBUG_RETURN(1);
12263
12264 rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));
12265 if (rows > (table_rows/50)) // EQ-range: Max 2% of rows
12266 rows= (table_rows/50);
12267
12268 if (min_key_length > eq_bound_offs)
12269 rows/= 2;
12270 if (max_key_length > eq_bound_offs)
12271 rows/= 2;
12272 }
12273 }
12274
12275 // Make sure that EQ is preferred even if row-count is low
12276 if (eq_bound_len && rows < 2) // At least 2 rows as not exact
12277 rows= 2;
12278 else if (rows < 3)
12279 rows= 3;
12280 DBUG_RETURN(MIN(rows,table_rows));
12281 } while (0);
12282
12283 DBUG_RETURN(10); /* Poor guess when you don't know anything */
12284 }
12285
table_flags(void) const12286 ulonglong ha_ndbcluster::table_flags(void) const
12287 {
12288 THD *thd= current_thd;
12289 ulonglong f=
12290 HA_REC_NOT_IN_SEQ |
12291 HA_NULL_IN_KEY |
12292 HA_AUTO_PART_KEY |
12293 HA_NO_PREFIX_CHAR_KEYS |
12294 #ifndef NDB_WITH_NEW_MRR_INTERFACE
12295 HA_NEED_READ_RANGE_BUFFER |
12296 #endif
12297 HA_CAN_GEOMETRY |
12298 HA_CAN_BIT_FIELD |
12299 HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
12300 HA_PRIMARY_KEY_REQUIRED_FOR_DELETE |
12301 HA_PARTIAL_COLUMN_READ |
12302 HA_HAS_OWN_BINLOGGING |
12303 HA_BINLOG_ROW_CAPABLE |
12304 HA_HAS_RECORDS |
12305 #ifndef NDB_WITHOUT_ONLINE_ALTER
12306 HA_ONLINE_ALTER |
12307 #endif
12308 0;
12309
12310 /*
12311 To allow for logging of ndb tables during stmt based logging;
12312 flag cabablity, but also turn off flag for OWN_BINLOGGING
12313 */
12314 if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
12315 f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
12316
12317 /**
12318 * To maximize join pushability we want const-table
12319 * optimization blocked if 'ndb_join_pushdown= on'
12320 */
12321 if (THDVAR(thd, join_pushdown))
12322 f= f | HA_BLOCK_CONST_TABLE;
12323
12324 return f;
12325 }
12326
table_type() const12327 const char * ha_ndbcluster::table_type() const
12328 {
12329 return("NDBCLUSTER");
12330 }
max_supported_record_length() const12331 uint ha_ndbcluster::max_supported_record_length() const
12332 {
12333 return NDB_MAX_TUPLE_SIZE;
12334 }
max_supported_keys() const12335 uint ha_ndbcluster::max_supported_keys() const
12336 {
12337 return MAX_KEY;
12338 }
max_supported_key_parts() const12339 uint ha_ndbcluster::max_supported_key_parts() const
12340 {
12341 return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
12342 }
max_supported_key_length() const12343 uint ha_ndbcluster::max_supported_key_length() const
12344 {
12345 return NDB_MAX_KEY_SIZE;
12346 }
max_supported_key_part_length() const12347 uint ha_ndbcluster::max_supported_key_part_length() const
12348 {
12349 return NDB_MAX_KEY_SIZE;
12350 }
low_byte_first() const12351 bool ha_ndbcluster::low_byte_first() const
12352 {
12353 #ifdef WORDS_BIGENDIAN
12354 return FALSE;
12355 #else
12356 return TRUE;
12357 #endif
12358 }
index_type(uint key_number)12359 const char* ha_ndbcluster::index_type(uint key_number)
12360 {
12361 switch (get_index_type(key_number)) {
12362 case ORDERED_INDEX:
12363 case UNIQUE_ORDERED_INDEX:
12364 case PRIMARY_KEY_ORDERED_INDEX:
12365 return "BTREE";
12366 case UNIQUE_INDEX:
12367 case PRIMARY_KEY_INDEX:
12368 default:
12369 return "HASH";
12370 }
12371 }
12372
table_cache_type()12373 uint8 ha_ndbcluster::table_cache_type()
12374 {
12375 DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
12376 DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
12377 }
12378
12379 /**
12380 Retrieve the commit count for the table object.
12381
12382 @param thd Thread context.
12383 @param norm_name Normalized path to the table.
12384 @param[out] commit_count Commit count for the table.
12385
12386 @return 0 on success.
12387 @return 1 if an error occured.
12388 */
12389
ndb_get_commitcount(THD * thd,char * norm_name,Uint64 * commit_count)12390 uint ndb_get_commitcount(THD *thd, char *norm_name,
12391 Uint64 *commit_count)
12392 {
12393 char dbname[NAME_LEN + 1];
12394 NDB_SHARE *share;
12395 DBUG_ENTER("ndb_get_commitcount");
12396
12397 DBUG_PRINT("enter", ("name: %s", norm_name));
12398 pthread_mutex_lock(&ndbcluster_mutex);
12399 if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
12400 (const uchar*) norm_name,
12401 strlen(norm_name))))
12402 {
12403 pthread_mutex_unlock(&ndbcluster_mutex);
12404 DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
12405 norm_name));
12406 DBUG_RETURN(1);
12407 }
12408 /* ndb_share reference temporary, free below */
12409 share->use_count++;
12410 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
12411 share->key, share->use_count));
12412 pthread_mutex_unlock(&ndbcluster_mutex);
12413
12414 pthread_mutex_lock(&share->mutex);
12415 if (opt_ndb_cache_check_time > 0)
12416 {
12417 if (share->commit_count != 0)
12418 {
12419 *commit_count= share->commit_count;
12420 #ifndef DBUG_OFF
12421 char buff[22];
12422 #endif
12423 DBUG_PRINT("info", ("Getting commit_count: %s from share",
12424 llstr(share->commit_count, buff)));
12425 pthread_mutex_unlock(&share->mutex);
12426 /* ndb_share reference temporary free */
12427 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12428 share->key, share->use_count));
12429 free_share(&share);
12430 DBUG_RETURN(0);
12431 }
12432 }
12433 DBUG_PRINT("info", ("Get commit_count from NDB"));
12434 Ndb *ndb;
12435 if (!(ndb= check_ndb_in_thd(thd)))
12436 DBUG_RETURN(1);
12437
12438 ha_ndbcluster::set_dbname(norm_name, dbname);
12439 if (ndb->setDatabaseName(dbname))
12440 {
12441 ERR_RETURN(ndb->getNdbError());
12442 }
12443 uint lock= share->commit_count_lock;
12444 pthread_mutex_unlock(&share->mutex);
12445
12446 struct Ndb_statistics stat;
12447 {
12448 char tblname[NAME_LEN + 1];
12449 ha_ndbcluster::set_tabname(norm_name, tblname);
12450 Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);
12451 if (ndbtab_g.get_table() == 0
12452 || ndb_get_table_statistics(thd, NULL,
12453 FALSE,
12454 ndb,
12455 ndbtab_g.get_table()->getDefaultRecord(),
12456 &stat))
12457 {
12458 /* ndb_share reference temporary free */
12459 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12460 share->key, share->use_count));
12461 free_share(&share);
12462 DBUG_RETURN(1);
12463 }
12464 }
12465
12466 pthread_mutex_lock(&share->mutex);
12467 if (share->commit_count_lock == lock)
12468 {
12469 #ifndef DBUG_OFF
12470 char buff[22];
12471 #endif
12472 DBUG_PRINT("info", ("Setting commit_count to %s",
12473 llstr(stat.commit_count, buff)));
12474 share->commit_count= stat.commit_count;
12475 *commit_count= stat.commit_count;
12476 }
12477 else
12478 {
12479 DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
12480 *commit_count= 0;
12481 }
12482 pthread_mutex_unlock(&share->mutex);
12483 /* ndb_share reference temporary free */
12484 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12485 share->key, share->use_count));
12486 free_share(&share);
12487 DBUG_RETURN(0);
12488 }
12489
12490
12491 /**
12492 Check if a cached query can be used.
12493
12494 This is done by comparing the supplied engine_data to commit_count of
12495 the table.
12496
12497 The commit_count is either retrieved from the share for the table, where
12498 it has been cached by the util thread. If the util thread is not started,
12499 NDB has to be contacetd to retrieve the commit_count, this will introduce
12500 a small delay while waiting for NDB to answer.
12501
12502
12503 @param thd thread handle
12504 @param full_name normalized path to the table in the canonical
12505 format.
12506 @param full_name_len length of the normalized path to the table.
12507 @param engine_data parameter retrieved when query was first inserted into
12508 the cache. If the value of engine_data is changed,
12509 all queries for this table should be invalidated.
12510
12511 @retval
12512 TRUE Yes, use the query from cache
12513 @retval
12514 FALSE No, don't use the cached query, and if engine_data
12515 has changed, all queries for this table should be invalidated
12516
12517 */
12518
12519 static my_bool
ndbcluster_cache_retrieval_allowed(THD * thd,char * full_name,uint full_name_len,ulonglong * engine_data)12520 ndbcluster_cache_retrieval_allowed(THD *thd,
12521 char *full_name, uint full_name_len,
12522 ulonglong *engine_data)
12523 {
12524 Uint64 commit_count;
12525 char dbname[NAME_LEN + 1];
12526 char tabname[NAME_LEN + 1];
12527 #ifndef DBUG_OFF
12528 char buff[22], buff2[22];
12529 #endif
12530
12531 ha_ndbcluster::set_dbname(full_name, dbname);
12532 ha_ndbcluster::set_tabname(full_name, tabname);
12533
12534 DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
12535 DBUG_PRINT("enter", ("dbname: %s, tabname: %s",
12536 dbname, tabname));
12537
12538 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
12539 {
12540 /* Don't allow qc to be used if table has been previously
12541 modified in transaction */
12542 if (!check_ndb_in_thd(thd))
12543 DBUG_RETURN(FALSE);
12544 Thd_ndb *thd_ndb= get_thd_ndb(thd);
12545 if (!thd_ndb->changed_tables.is_empty())
12546 {
12547 NDB_SHARE* share;
12548 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
12549 while ((share= it++))
12550 {
12551 if (strcmp(share->table_name, tabname) == 0 &&
12552 strcmp(share->db, dbname) == 0)
12553 {
12554 DBUG_PRINT("exit", ("No, transaction has changed table"));
12555 DBUG_RETURN(FALSE);
12556 }
12557 }
12558 }
12559 }
12560
12561 if (ndb_get_commitcount(thd, full_name, &commit_count))
12562 {
12563 *engine_data= 0; /* invalidate */
12564 DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
12565 DBUG_RETURN(FALSE);
12566 }
12567 DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",
12568 llstr(*engine_data, buff), llstr(commit_count, buff2)));
12569 if (commit_count == 0)
12570 {
12571 *engine_data= 0; /* invalidate */
12572 DBUG_PRINT("exit", ("No, local commit has been performed"));
12573 DBUG_RETURN(FALSE);
12574 }
12575 else if (*engine_data != commit_count)
12576 {
12577 *engine_data= commit_count; /* invalidate */
12578 DBUG_PRINT("exit", ("No, commit_count has changed"));
12579 DBUG_RETURN(FALSE);
12580 }
12581
12582 DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",
12583 llstr(*engine_data, buff)));
12584 DBUG_RETURN(TRUE);
12585 }
12586
12587
12588 /**
12589 Register a table for use in the query cache.
12590
12591 Fetch the commit_count for the table and return it in engine_data,
12592 this will later be used to check if the table has changed, before
12593 the cached query is reused.
12594
12595 @param thd thread handle
12596 @param full_name normalized path to the table in the
12597 canonical format.
12598 @param full_name_len length of the normalized path to the table.
12599 @param engine_callback function to be called before using cache on
12600 this table
12601 @param[out] engine_data commit_count for this table
12602
12603 @retval
12604 TRUE Yes, it's ok to cahce this query
12605 @retval
12606 FALSE No, don't cach the query
12607 */
12608
12609 my_bool
register_query_cache_table(THD * thd,char * full_name,uint full_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)12610 ha_ndbcluster::register_query_cache_table(THD *thd,
12611 char *full_name, uint full_name_len,
12612 qc_engine_callback *engine_callback,
12613 ulonglong *engine_data)
12614 {
12615 Uint64 commit_count;
12616 #ifndef DBUG_OFF
12617 char buff[22];
12618 #endif
12619 DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
12620 DBUG_PRINT("enter",("dbname: %s, tabname: %s",
12621 m_dbname, m_tabname));
12622
12623 if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
12624 {
12625 /* Don't allow qc to be used if table has been previously
12626 modified in transaction */
12627 Thd_ndb *thd_ndb= get_thd_ndb(thd);
12628 if (!thd_ndb->changed_tables.is_empty())
12629 {
12630 DBUG_ASSERT(m_share);
12631 NDB_SHARE* share;
12632 List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
12633 while ((share= it++))
12634 {
12635 if (m_share == share)
12636 {
12637 DBUG_PRINT("exit", ("No, transaction has changed table"));
12638 DBUG_RETURN(FALSE);
12639 }
12640 }
12641 }
12642 }
12643
12644 if (ndb_get_commitcount(thd, full_name, &commit_count))
12645 {
12646 *engine_data= 0;
12647 DBUG_PRINT("exit", ("Error, could not get commitcount"));
12648 DBUG_RETURN(FALSE);
12649 }
12650 *engine_data= commit_count;
12651 *engine_callback= ndbcluster_cache_retrieval_allowed;
12652 DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));
12653 DBUG_RETURN(commit_count > 0);
12654 }
12655
12656
12657 /**
12658 Handling the shared NDB_SHARE structure that is needed to
12659 provide table locking.
12660
12661 It's also used for sharing data with other NDB handlers
12662 in the same MySQL Server. There is currently not much
12663 data we want to or can share.
12664 */
12665
ndbcluster_get_key(NDB_SHARE * share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))12666 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
12667 my_bool not_used MY_ATTRIBUTE((unused)))
12668 {
12669 *length= share->key_length;
12670 return (uchar*) share->key;
12671 }
12672
12673
12674 #ifndef DBUG_OFF
12675
print_share(const char * where,NDB_SHARE * share)12676 static void print_share(const char* where, NDB_SHARE* share)
12677 {
12678 fprintf(DBUG_FILE,
12679 "%s %s.%s: use_count: %u, commit_count: %lu\n",
12680 where, share->db, share->table_name, share->use_count,
12681 (ulong) share->commit_count);
12682 fprintf(DBUG_FILE,
12683 " - key: %s, key_length: %d\n",
12684 share->key, share->key_length);
12685
12686 Ndb_event_data *event_data= 0;
12687 if (share->event_data)
12688 event_data= share->event_data;
12689 else if (share->op)
12690 event_data= (Ndb_event_data *) share->op->getCustomData();
12691 if (event_data)
12692 {
12693 fprintf(DBUG_FILE,
12694 " - event_data->shadow_table: %p %s.%s\n",
12695 event_data->shadow_table, event_data->shadow_table->s->db.str,
12696 event_data->shadow_table->s->table_name.str);
12697 }
12698 }
12699
12700
print_ndbcluster_open_tables()12701 static void print_ndbcluster_open_tables()
12702 {
12703 DBUG_LOCK_FILE;
12704 fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
12705 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
12706 print_share("",
12707 (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i));
12708 fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
12709 DBUG_UNLOCK_FILE;
12710 }
12711
12712 #endif
12713
12714
12715 #define dbug_print_open_tables() \
12716 DBUG_EXECUTE("info", \
12717 print_ndbcluster_open_tables(););
12718
12719 #define dbug_print_share(t, s) \
12720 DBUG_LOCK_FILE; \
12721 DBUG_EXECUTE("info", \
12722 print_share((t), (s));); \
12723 DBUG_UNLOCK_FILE;
12724
12725
12726 /*
12727 For some reason a share is still around, try to salvage the situation
12728 by closing all cached tables. If the share still exists, there is an
12729 error somewhere but only report this to the error log. Keep this
12730 "trailing share" but rename it since there are still references to it
12731 to avoid segmentation faults. There is a risk that the memory for
12732 this trailing share leaks.
12733
12734 Must be called with previous pthread_mutex_lock(&ndbcluster_mutex)
12735 */
handle_trailing_share(THD * thd,NDB_SHARE * share)12736 int handle_trailing_share(THD *thd, NDB_SHARE *share)
12737 {
12738 static ulong trailing_share_id= 0;
12739 DBUG_ENTER("handle_trailing_share");
12740
12741 /* ndb_share reference temporary, free below */
12742 ++share->use_count;
12743 if (opt_ndb_extra_logging > 9)
12744 sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12745 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
12746 share->key, share->use_count));
12747 pthread_mutex_unlock(&ndbcluster_mutex);
12748
12749 TABLE_LIST table_list;
12750 memset(&table_list, 0, sizeof(table_list));
12751 table_list.db= share->db;
12752 table_list.alias= table_list.table_name= share->table_name;
12753 close_cached_tables(thd, &table_list, TRUE, FALSE, FALSE);
12754
12755 pthread_mutex_lock(&ndbcluster_mutex);
12756 /* ndb_share reference temporary free */
12757 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
12758 share->key, share->use_count));
12759 if (!--share->use_count)
12760 {
12761 if (opt_ndb_extra_logging > 9)
12762 sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12763 if (opt_ndb_extra_logging)
12764 sql_print_information("NDB_SHARE: trailing share "
12765 "%s(connect_count: %u) "
12766 "released by close_cached_tables at "
12767 "connect_count: %u",
12768 share->key,
12769 share->connect_count,
12770 g_ndb_cluster_connection->get_connect_count());
12771 ndbcluster_real_free_share(&share);
12772 DBUG_RETURN(0);
12773 }
12774 if (opt_ndb_extra_logging > 9)
12775 sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12776
12777 /*
12778 share still exists, if share has not been dropped by server
12779 release that share
12780 */
12781 if (share->state != NSS_DROPPED)
12782 {
12783 share->state= NSS_DROPPED;
12784 /* ndb_share reference create free */
12785 DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u",
12786 share->key, share->use_count));
12787 --share->use_count;
12788 if (opt_ndb_extra_logging > 9)
12789 sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12790
12791 if (share->use_count == 0)
12792 {
12793 if (opt_ndb_extra_logging)
12794 sql_print_information("NDB_SHARE: trailing share "
12795 "%s(connect_count: %u) "
12796 "released after NSS_DROPPED check "
12797 "at connect_count: %u",
12798 share->key,
12799 share->connect_count,
12800 g_ndb_cluster_connection->get_connect_count());
12801 ndbcluster_real_free_share(&share);
12802 DBUG_RETURN(0);
12803 }
12804 }
12805
12806 DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",
12807 share->key, share->use_count, (long) share->op));
12808 /*
12809 Ignore table shares only opened by util thread
12810 */
12811 if (!((share->use_count == 1) && share->util_thread))
12812 {
12813 #ifdef NDB_LOG_TRAILING_SHARE_ERRORS
12814 sql_print_warning("NDB_SHARE: %s already exists use_count=%d."
12815 " Moving away for safety, but possible memleak.",
12816 share->key, share->use_count);
12817 #endif
12818 }
12819 dbug_print_open_tables();
12820
12821 /*
12822 Ndb share has not been released as it should
12823 */
12824 #ifdef NOT_YET
12825 DBUG_ASSERT(FALSE);
12826 #endif
12827
12828 /*
12829 This is probably an error. We can however save the situation
12830 at the cost of a possible mem leak, by "renaming" the share
12831 - First remove from hash
12832 */
12833 my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
12834
12835 /*
12836 now give it a new name, just a running number
12837 if space is not enough allocate some more
12838 */
12839 {
12840 const uint min_key_length= 10;
12841 if (share->key_length < min_key_length)
12842 {
12843 share->key= (char*) alloc_root(&share->mem_root, min_key_length + 1);
12844 share->key_length= min_key_length;
12845 }
12846 share->key_length=
12847 my_snprintf(share->key, min_key_length + 1, "#leak%lu",
12848 trailing_share_id++);
12849 }
12850 /* Keep it for possible the future trailing free */
12851 my_hash_insert(&ndbcluster_open_tables, (uchar*) share);
12852
12853 DBUG_RETURN(0);
12854 }
12855
12856 /*
12857 Rename share is used during rename table.
12858 */
ndbcluster_prepare_rename_share(NDB_SHARE * share,const char * new_key)12859 int ndbcluster_prepare_rename_share(NDB_SHARE *share, const char *new_key)
12860 {
12861 /*
12862 allocate and set the new key, db etc
12863 enough space for key, db, and table_name
12864 */
12865 uint new_length= (uint) strlen(new_key);
12866 share->new_key= (char*) alloc_root(&share->mem_root, 2 * (new_length + 1));
12867 strmov(share->new_key, new_key);
12868 return 0;
12869 }
12870
ndbcluster_undo_rename_share(THD * thd,NDB_SHARE * share)12871 int ndbcluster_undo_rename_share(THD *thd, NDB_SHARE *share)
12872 {
12873 share->new_key= share->old_names;
12874 ndbcluster_rename_share(thd, share);
12875 return 0;
12876 }
12877
ndbcluster_rename_share(THD * thd,NDB_SHARE * share)12878 int ndbcluster_rename_share(THD *thd, NDB_SHARE *share)
12879 {
12880 NDB_SHARE *tmp;
12881 pthread_mutex_lock(&ndbcluster_mutex);
12882 uint new_length= (uint) strlen(share->new_key);
12883 DBUG_PRINT("ndbcluster_rename_share", ("old_key: %s old__length: %d",
12884 share->key, share->key_length));
12885 if ((tmp= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
12886 (const uchar*) share->new_key,
12887 new_length)))
12888 handle_trailing_share(thd, tmp);
12889
12890 /* remove the share from hash */
12891 my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
12892 dbug_print_open_tables();
12893
12894 /* save old stuff if insert should fail */
12895 uint old_length= share->key_length;
12896 char *old_key= share->key;
12897
12898 share->key= share->new_key;
12899 share->key_length= new_length;
12900
12901 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
12902 {
12903 // ToDo free the allocated stuff above?
12904 DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",
12905 share->key));
12906 share->key= old_key;
12907 share->key_length= old_length;
12908 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
12909 {
12910 sql_print_error("ndbcluster_rename_share: failed to recover %s", share->key);
12911 DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",
12912 share->key));
12913 }
12914 dbug_print_open_tables();
12915 pthread_mutex_unlock(&ndbcluster_mutex);
12916 return -1;
12917 }
12918 dbug_print_open_tables();
12919
12920 share->db= share->key + new_length + 1;
12921 ha_ndbcluster::set_dbname(share->new_key, share->db);
12922 share->table_name= share->db + strlen(share->db) + 1;
12923 ha_ndbcluster::set_tabname(share->new_key, share->table_name);
12924
12925 dbug_print_share("ndbcluster_rename_share:", share);
12926 Ndb_event_data *event_data= 0;
12927 if (share->event_data)
12928 event_data= share->event_data;
12929 else if (share->op)
12930 event_data= (Ndb_event_data *) share->op->getCustomData();
12931 if (event_data && event_data->shadow_table)
12932 {
12933 if (!IS_TMP_PREFIX(share->table_name))
12934 {
12935 event_data->shadow_table->s->db.str= share->db;
12936 event_data->shadow_table->s->db.length= strlen(share->db);
12937 event_data->shadow_table->s->table_name.str= share->table_name;
12938 event_data->shadow_table->s->table_name.length= strlen(share->table_name);
12939 }
12940 else
12941 {
12942 /**
12943 * we don't rename the table->s here
12944 * that is used by injector
12945 * as we don't know if all events has been processed
12946 * This will be dropped anyway
12947 */
12948 }
12949 }
12950 /* else rename will be handled when the ALTER event comes */
12951 share->old_names= old_key;
12952 // ToDo free old_names after ALTER EVENT
12953
12954 if (opt_ndb_extra_logging > 9)
12955 sql_print_information ("ndbcluster_rename_share: %s-%s use_count: %u", old_key, share->key, share->use_count);
12956
12957 pthread_mutex_unlock(&ndbcluster_mutex);
12958 return 0;
12959 }
12960
12961 /*
12962 Increase refcount on existing share.
12963 Always returns share and cannot fail.
12964 */
ndbcluster_get_share(NDB_SHARE * share)12965 NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
12966 {
12967 pthread_mutex_lock(&ndbcluster_mutex);
12968 share->use_count++;
12969
12970 dbug_print_open_tables();
12971 dbug_print_share("ndbcluster_get_share:", share);
12972 if (opt_ndb_extra_logging > 9)
12973 sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);
12974 pthread_mutex_unlock(&ndbcluster_mutex);
12975 return share;
12976 }
12977
12978
12979 /*
12980 Get a share object for key
12981
12982 Returns share for key, and increases the refcount on the share.
12983
12984 create_if_not_exists == TRUE:
12985 creates share if it does not alreade exist
12986 returns 0 only due to out of memory, and then sets my_error
12987
12988 create_if_not_exists == FALSE:
12989 returns 0 if share does not exist
12990
12991 have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken
12992 */
12993
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists,bool have_lock)12994 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
12995 bool create_if_not_exists,
12996 bool have_lock)
12997 {
12998 NDB_SHARE *share;
12999 uint length= (uint) strlen(key);
13000 DBUG_ENTER("ndbcluster_get_share");
13001 DBUG_PRINT("enter", ("key: '%s'", key));
13002
13003 if (!have_lock)
13004 pthread_mutex_lock(&ndbcluster_mutex);
13005 if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
13006 (const uchar*) key,
13007 length)))
13008 {
13009 if (!create_if_not_exists)
13010 {
13011 DBUG_PRINT("error", ("get_share: %s does not exist", key));
13012 if (!have_lock)
13013 pthread_mutex_unlock(&ndbcluster_mutex);
13014 DBUG_RETURN(0);
13015 }
13016 if ((share= (NDB_SHARE*) my_malloc(sizeof(*share),
13017 MYF(MY_WME | MY_ZEROFILL))))
13018 {
13019 MEM_ROOT **root_ptr=
13020 my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC);
13021 MEM_ROOT *old_root= *root_ptr;
13022 init_sql_alloc(&share->mem_root, 1024, 0);
13023 *root_ptr= &share->mem_root; // remember to reset before return
13024 share->flags= 0;
13025 share->state= NSS_INITIAL;
13026 /* enough space for key, db, and table_name */
13027 share->key= (char*) alloc_root(*root_ptr, 2 * (length + 1));
13028 share->key_length= length;
13029 strmov(share->key, key);
13030 if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
13031 {
13032 free_root(&share->mem_root, MYF(0));
13033 my_free((uchar*) share, 0);
13034 *root_ptr= old_root;
13035 if (!have_lock)
13036 pthread_mutex_unlock(&ndbcluster_mutex);
13037 DBUG_RETURN(0);
13038 }
13039 thr_lock_init(&share->lock);
13040 pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
13041 share->commit_count= 0;
13042 share->commit_count_lock= 0;
13043 share->db= share->key + length + 1;
13044 ha_ndbcluster::set_dbname(key, share->db);
13045 share->table_name= share->db + strlen(share->db) + 1;
13046 ha_ndbcluster::set_tabname(key, share->table_name);
13047 if (ndbcluster_binlog_init_share(current_thd, share, table))
13048 {
13049 DBUG_PRINT("error", ("get_share: %s could not init share", key));
13050 ndbcluster_real_free_share(&share);
13051 *root_ptr= old_root;
13052 if (!have_lock)
13053 pthread_mutex_unlock(&ndbcluster_mutex);
13054 DBUG_RETURN(0);
13055 }
13056 *root_ptr= old_root;
13057 }
13058 else
13059 {
13060 DBUG_PRINT("error", ("get_share: failed to alloc share"));
13061 if (!have_lock)
13062 pthread_mutex_unlock(&ndbcluster_mutex);
13063 my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));
13064 DBUG_RETURN(0);
13065 }
13066 }
13067 share->use_count++;
13068 if (opt_ndb_extra_logging > 9)
13069 sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);
13070
13071 dbug_print_open_tables();
13072 dbug_print_share("ndbcluster_get_share:", share);
13073 if (!have_lock)
13074 pthread_mutex_unlock(&ndbcluster_mutex);
13075 DBUG_RETURN(share);
13076 }
13077
13078
ndbcluster_real_free_share(NDB_SHARE ** share)13079 void ndbcluster_real_free_share(NDB_SHARE **share)
13080 {
13081 DBUG_ENTER("ndbcluster_real_free_share");
13082 dbug_print_share("ndbcluster_real_free_share:", *share);
13083
13084 if (opt_ndb_extra_logging > 9)
13085 sql_print_information ("ndbcluster_real_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13086
13087 ndb_index_stat_free(*share);
13088
13089 my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);
13090 thr_lock_delete(&(*share)->lock);
13091 pthread_mutex_destroy(&(*share)->mutex);
13092
13093 #ifdef HAVE_NDB_BINLOG
13094 if ((*share)->m_cfn_share && (*share)->m_cfn_share->m_ex_tab && g_ndb)
13095 {
13096 NDBDICT *dict= g_ndb->getDictionary();
13097 dict->removeTableGlobal(*(*share)->m_cfn_share->m_ex_tab, 0);
13098 (*share)->m_cfn_share->m_ex_tab= 0;
13099 }
13100 #endif
13101 (*share)->new_op= 0;
13102 if ((*share)->event_data)
13103 {
13104 delete (*share)->event_data;
13105 (*share)->event_data= 0;
13106 }
13107 free_root(&(*share)->mem_root, MYF(0));
13108 my_free((uchar*) *share, MYF(0));
13109 *share= 0;
13110
13111 dbug_print_open_tables();
13112 DBUG_VOID_RETURN;
13113 }
13114
13115
ndbcluster_free_share(NDB_SHARE ** share,bool have_lock)13116 void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
13117 {
13118 if (!have_lock)
13119 pthread_mutex_lock(&ndbcluster_mutex);
13120 if (!--(*share)->use_count)
13121 {
13122 if (opt_ndb_extra_logging > 9)
13123 sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13124 ndbcluster_real_free_share(share);
13125 }
13126 else
13127 {
13128 if (opt_ndb_extra_logging > 9)
13129 sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13130 dbug_print_open_tables();
13131 dbug_print_share("ndbcluster_free_share:", *share);
13132 }
13133 if (!have_lock)
13134 pthread_mutex_unlock(&ndbcluster_mutex);
13135 }
13136
13137
13138 struct ndb_table_statistics_row {
13139 Uint64 rows;
13140 Uint64 commits;
13141 Uint32 size;
13142 Uint64 fixed_mem;
13143 Uint64 var_mem;
13144 };
13145
update_stats(THD * thd,bool do_read_stat,bool have_lock,uint part_id)13146 int ha_ndbcluster::update_stats(THD *thd,
13147 bool do_read_stat,
13148 bool have_lock,
13149 uint part_id)
13150 {
13151 struct Ndb_statistics stat;
13152 Thd_ndb *thd_ndb= get_thd_ndb(thd);
13153 DBUG_ENTER("ha_ndbcluster::update_stats");
13154 do
13155 {
13156 if (m_share && !do_read_stat)
13157 {
13158 pthread_mutex_lock(&m_share->mutex);
13159 stat= m_share->stat;
13160 pthread_mutex_unlock(&m_share->mutex);
13161
13162 DBUG_ASSERT(stat.row_count != ~(ha_rows)0); // should never be invalid
13163
13164 /* Accept shared cached statistics if row_count is valid. */
13165 if (stat.row_count != ~(ha_rows)0)
13166 break;
13167 }
13168
13169 /* Request statistics from datanodes */
13170 Ndb *ndb= thd_ndb->ndb;
13171 if (ndb->setDatabaseName(m_dbname))
13172 {
13173 DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
13174 }
13175 if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,
13176 m_ndb_record, &stat,
13177 have_lock, part_id))
13178 {
13179 DBUG_RETURN(err);
13180 }
13181
13182 /* Update shared statistics with fresh data */
13183 if (m_share)
13184 {
13185 pthread_mutex_lock(&m_share->mutex);
13186 m_share->stat= stat;
13187 pthread_mutex_unlock(&m_share->mutex);
13188 }
13189 break;
13190 }
13191 while(0);
13192
13193 int no_uncommitted_rows_count= 0;
13194 if (m_table_info && !thd_ndb->m_error)
13195 {
13196 m_table_info->records= stat.row_count;
13197 m_table_info->last_count= thd_ndb->count;
13198 no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;
13199 }
13200 stats.mean_rec_length= stat.row_size;
13201 stats.data_file_length= stat.fragment_memory;
13202 stats.records= stat.row_count + no_uncommitted_rows_count;
13203 stats.max_data_file_length= stat.fragment_extent_space;
13204 stats.delete_length= stat.fragment_extent_free_space;
13205
13206 DBUG_PRINT("exit", ("stats.records: %d "
13207 "stat->row_count: %d "
13208 "no_uncommitted_rows_count: %d"
13209 "stat->fragment_extent_space: %u "
13210 "stat->fragment_extent_free_space: %u",
13211 (int)stats.records,
13212 (int)stat.row_count,
13213 (int)no_uncommitted_rows_count,
13214 (uint)stat.fragment_extent_space,
13215 (uint)stat.fragment_extent_free_space));
13216 DBUG_RETURN(0);
13217 }
13218
13219 /**
13220 Update 'row_count' in shared table statistcs if any rows where
13221 inserted/deleted by the local transaction related to specified
13222 'local_stat'.
13223 Should be called when transaction has succesfully commited its changes.
13224 */
13225 static
modify_shared_stats(NDB_SHARE * share,Ndb_local_table_statistics * local_stat)13226 void modify_shared_stats(NDB_SHARE *share,
13227 Ndb_local_table_statistics *local_stat)
13228 {
13229 if (local_stat->no_uncommitted_rows_count)
13230 {
13231 pthread_mutex_lock(&share->mutex);
13232 DBUG_ASSERT(share->stat.row_count != ~(ha_rows)0);// should never be invalid
13233 if (share->stat.row_count != ~(ha_rows)0)
13234 {
13235 DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",
13236 share->table_name, (ulong) share->stat.row_count,
13237 local_stat->no_uncommitted_rows_count));
13238 share->stat.row_count=
13239 ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)
13240 ? share->stat.row_count+local_stat->no_uncommitted_rows_count
13241 : 0;
13242 }
13243 pthread_mutex_unlock(&share->mutex);
13244 local_stat->no_uncommitted_rows_count= 0;
13245 }
13246 }
13247
13248 /* If part_id contains a legal partition id, ndbstat returns the
13249 partition-statistics pertaining to that partition only.
13250 Otherwise, it returns the table-statistics,
13251 which is an aggregate over all partitions of that table.
13252 */
13253 static
13254 int
ndb_get_table_statistics(THD * thd,ha_ndbcluster * file,bool report_error,Ndb * ndb,const NdbRecord * record,struct Ndb_statistics * ndbstat,bool have_lock,uint part_id)13255 ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,
13256 const NdbRecord *record,
13257 struct Ndb_statistics * ndbstat,
13258 bool have_lock,
13259 uint part_id)
13260 {
13261 Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
13262 NdbTransaction* pTrans;
13263 NdbError error;
13264 int retries= 100;
13265 int reterr= 0;
13266 int retry_sleep= 30; /* 30 milliseconds */
13267 const char *dummyRowPtr;
13268 NdbOperation::GetValueSpec extraGets[8];
13269 Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;
13270 Uint32 size, fragid;
13271 #ifndef DBUG_OFF
13272 char buff[22], buff2[22], buff3[22], buff4[22], buff5[22], buff6[22];
13273 #endif
13274 DBUG_ENTER("ndb_get_table_statistics");
13275
13276 DBUG_ASSERT(record != 0);
13277
13278 /* We use the passed in NdbRecord just to get access to the
13279 table, we mask out any/all columns it may have and add
13280 our reads as extraGets. This is necessary as they are
13281 all pseudo-columns
13282 */
13283 extraGets[0].column= NdbDictionary::Column::ROW_COUNT;
13284 extraGets[0].appStorage= &rows;
13285 extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;
13286 extraGets[1].appStorage= &commits;
13287 extraGets[2].column= NdbDictionary::Column::ROW_SIZE;
13288 extraGets[2].appStorage= &size;
13289 extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;
13290 extraGets[3].appStorage= &fixed_mem;
13291 extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;
13292 extraGets[4].appStorage= &var_mem;
13293 extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;
13294 extraGets[5].appStorage= &ext_space;
13295 extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;
13296 extraGets[6].appStorage= &free_ext_space;
13297 extraGets[7].column= NdbDictionary::Column::FRAGMENT;
13298 extraGets[7].appStorage= &fragid;
13299
13300 const Uint32 codeWords= 1;
13301 Uint32 codeSpace[ codeWords ];
13302 NdbInterpretedCode code(NULL, // Table is irrelevant
13303 &codeSpace[0],
13304 codeWords);
13305 if ((code.interpret_exit_last_row() != 0) ||
13306 (code.finalise() != 0))
13307 {
13308 reterr= code.getNdbError().code;
13309 DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
13310 error.code, error.message));
13311 DBUG_RETURN(reterr);
13312 }
13313
13314 do
13315 {
13316 Uint32 count= 0;
13317 Uint64 sum_rows= 0;
13318 Uint64 sum_commits= 0;
13319 Uint64 sum_row_size= 0;
13320 Uint64 sum_mem= 0;
13321 Uint64 sum_ext_space= 0;
13322 Uint64 sum_free_ext_space= 0;
13323 NdbScanOperation*pOp;
13324 int check;
13325
13326 if ((pTrans= ndb->startTransaction()) == NULL)
13327 {
13328 error= ndb->getNdbError();
13329 goto retry;
13330 }
13331
13332 NdbScanOperation::ScanOptions options;
13333 options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |
13334 NdbScanOperation::ScanOptions::SO_GETVALUE |
13335 NdbScanOperation::ScanOptions::SO_INTERPRETED;
13336 /* Set batch_size=1, as we need only one row per fragment. */
13337 options.batch= 1;
13338 options.extraGetValues= &extraGets[0];
13339 options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]);
13340 options.interpretedCode= &code;
13341
13342 if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,
13343 empty_mask,
13344 &options,
13345 sizeof(NdbScanOperation::ScanOptions))) == NULL)
13346 {
13347 error= pTrans->getNdbError();
13348 goto retry;
13349 }
13350 thd_ndb->m_scan_count++;
13351 thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);
13352
13353 thd_ndb->m_execute_count++;
13354 DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
13355 if (pTrans->execute(NdbTransaction::NoCommit,
13356 NdbOperation::AbortOnError,
13357 TRUE) == -1)
13358 {
13359 error= pTrans->getNdbError();
13360 goto retry;
13361 }
13362
13363 while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)
13364 {
13365 DBUG_PRINT("info", ("nextResult rows: %d commits: %d"
13366 "fixed_mem_size %d var_mem_size %d "
13367 "fragmentid %d extent_space %d free_extent_space %d",
13368 (int)rows, (int)commits, (int)fixed_mem,
13369 (int)var_mem, (int)fragid, (int)ext_space,
13370 (int)free_ext_space));
13371
13372 if ((part_id != ~(uint)0) && fragid != part_id)
13373 {
13374 continue;
13375 }
13376
13377 sum_rows+= rows;
13378 sum_commits+= commits;
13379 if (sum_row_size < size)
13380 sum_row_size= size;
13381 sum_mem+= fixed_mem + var_mem;
13382 count++;
13383 sum_ext_space += ext_space;
13384 sum_free_ext_space += free_ext_space;
13385
13386 if ((part_id != ~(uint)0) && fragid == part_id)
13387 {
13388 break;
13389 }
13390 }
13391
13392 if (check == -1)
13393 {
13394 error= pOp->getNdbError();
13395 goto retry;
13396 }
13397
13398 pOp->close(TRUE);
13399
13400 ndb->closeTransaction(pTrans);
13401
13402 ndbstat->row_count= sum_rows;
13403 ndbstat->commit_count= sum_commits;
13404 ndbstat->row_size= (ulong)sum_row_size;
13405 ndbstat->fragment_memory= sum_mem;
13406 ndbstat->fragment_extent_space= sum_ext_space;
13407 ndbstat->fragment_extent_free_space= sum_free_ext_space;
13408
13409 DBUG_PRINT("exit", ("records: %s commits: %s "
13410 "row_size: %s mem: %s "
13411 "allocated: %s free: %s "
13412 "count: %u",
13413 llstr(sum_rows, buff),
13414 llstr(sum_commits, buff2),
13415 llstr(sum_row_size, buff3),
13416 llstr(sum_mem, buff4),
13417 llstr(sum_ext_space, buff5),
13418 llstr(sum_free_ext_space, buff6),
13419 count));
13420
13421 DBUG_RETURN(0);
13422 retry:
13423 if(report_error)
13424 {
13425 if (file && pTrans)
13426 {
13427 reterr= file->ndb_err(pTrans, have_lock);
13428 }
13429 else
13430 {
13431 const NdbError& tmp= error;
13432 ERR_PRINT(tmp);
13433 reterr= ndb_to_mysql_error(&tmp);
13434 }
13435 }
13436 else
13437 reterr= error.code;
13438
13439 if (pTrans)
13440 {
13441 ndb->closeTransaction(pTrans);
13442 pTrans= NULL;
13443 }
13444 if (error.status == NdbError::TemporaryError &&
13445 retries-- && !thd->killed)
13446 {
13447 do_retry_sleep(retry_sleep);
13448 continue;
13449 }
13450 break;
13451 } while(1);
13452 DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
13453 error.code, error.message));
13454 DBUG_RETURN(reterr);
13455 }
13456
13457 /**
13458 Create a .ndb file to serve as a placeholder indicating
13459 that the table with this name is a ndb table.
13460 */
13461
write_ndb_file(const char * name)13462 int ha_ndbcluster::write_ndb_file(const char *name)
13463 {
13464 File file;
13465 bool error=1;
13466 char path[FN_REFLEN];
13467
13468 DBUG_ENTER("write_ndb_file");
13469 DBUG_PRINT("enter", ("name: %s", name));
13470
13471 #ifndef EMBEDDED_LIBRARY
13472 (void)strxnmov(path, FN_REFLEN-1,
13473 mysql_data_home,"/",name,ha_ndb_ext,NullS);
13474 #else
13475 (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);
13476 #endif
13477
13478 if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
13479 {
13480 // It's an empty file
13481 error=0;
13482 my_close(file,MYF(0));
13483 }
13484 DBUG_RETURN(error);
13485 }
13486
13487 #ifndef NDB_WITH_NEW_MRR_INTERFACE
13488 bool
null_value_index_search(KEY_MULTI_RANGE * ranges,KEY_MULTI_RANGE * end_range,HANDLER_BUFFER * buffer)13489 ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
13490 KEY_MULTI_RANGE *end_range,
13491 HANDLER_BUFFER *buffer)
13492 {
13493 DBUG_ENTER("null_value_index_search");
13494 KEY* key_info= table->key_info + active_index;
13495 KEY_MULTI_RANGE *range= ranges;
13496 ulong reclength= table->s->reclength;
13497 uchar *curr= (uchar*)buffer->buffer;
13498 uchar *end_of_buffer= (uchar*)buffer->buffer_end;
13499
13500 /* All passed ranges whose results could fit into the
13501 * buffer are examined, although some may later be
13502 * marked for skipping, wasting buffer space.
13503 */
13504 assert(!(range->range_flag & SKIP_RANGE));
13505
13506 for (; range<end_range && curr+reclength <= end_of_buffer;
13507 range++)
13508 {
13509 const uchar *key= range->start_key.key;
13510 uint key_len= range->start_key.length;
13511 if (check_null_in_key(key_info, key, key_len))
13512 DBUG_RETURN(TRUE);
13513 curr += reclength;
13514 }
13515 DBUG_RETURN(FALSE);
13516 }
13517 #endif
13518
check_read_before_write_removal()13519 void ha_ndbcluster::check_read_before_write_removal()
13520 {
13521 DBUG_ENTER("check_read_before_write_removal");
13522
13523 /* Must have determined that rbwr is possible */
13524 assert(m_read_before_write_removal_possible);
13525 m_read_before_write_removal_used= true;
13526
13527 /* Can't use on table with hidden primary key */
13528 assert(table_share->primary_key != MAX_KEY);
13529
13530 /* Index must be unique */
13531 DBUG_PRINT("info", ("using index %d", active_index));
13532 const KEY *key= table->key_info + active_index;
13533 assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);
13534
13535 DBUG_VOID_RETURN;
13536 }
13537
13538 #ifndef NDB_WITH_NEW_MRR_INTERFACE
13539 /*
13540 This is used to check if an ordered index scan is needed for a range in
13541 a multi range read.
13542 If a scan is not needed, we use a faster primary/unique key operation
13543 instead.
13544 */
13545 static my_bool
read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,const KEY * key_info,const KEY_MULTI_RANGE * r)13546 read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,
13547 const KEY_MULTI_RANGE *r)
13548 {
13549 if (cur_index_type == ORDERED_INDEX)
13550 return TRUE;
13551 if (cur_index_type == PRIMARY_KEY_INDEX ||
13552 cur_index_type == UNIQUE_INDEX)
13553 return FALSE;
13554 DBUG_ASSERT(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
13555 cur_index_type == UNIQUE_ORDERED_INDEX);
13556 if (r->start_key.length != key_info->key_length ||
13557 r->start_key.flag != HA_READ_KEY_EXACT)
13558 return TRUE; // Not exact match, need scan
13559 if (cur_index_type == UNIQUE_ORDERED_INDEX &&
13560 check_null_in_key(key_info, r->start_key.key,r->start_key.length))
13561 return TRUE; // Can't use for NULL values
13562 return FALSE;
13563 }
13564
13565 int
read_multi_range_first(KEY_MULTI_RANGE ** found_range_p,KEY_MULTI_RANGE * ranges,uint range_count,bool sorted,HANDLER_BUFFER * buffer)13566 ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
13567 KEY_MULTI_RANGE *ranges,
13568 uint range_count,
13569 bool sorted,
13570 HANDLER_BUFFER *buffer)
13571 {
13572 KEY* key_info= table->key_info + active_index;
13573 NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
13574 ulong reclength= table_share->reclength;
13575 NdbTransaction *trans= m_thd_ndb->trans;
13576 int error;
13577
13578 DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
13579 DBUG_PRINT("info", ("blob fields=%d read_set=0x%x", table_share->blob_fields, table->read_set->bitmap[0]));
13580
13581 /**
13582 * Blobs and unique hash index with NULL can't be batched currently.
13583 * Neither are pushed lookup joins batchable.
13584 */
13585 if (uses_blob_value(table->read_set) ||
13586 (cur_index_type == UNIQUE_INDEX &&
13587 has_null_in_unique_index(active_index) &&
13588 null_value_index_search(ranges, ranges+range_count, buffer))
13589 || (m_pushed_join_operation==PUSHED_ROOT &&
13590 !m_disable_pushed_join &&
13591 !m_pushed_join_member->get_query_def().isScanQuery())
13592 || m_delete_cannot_batch || m_update_cannot_batch)
13593 {
13594 DBUG_PRINT("info", ("read_multi_range not possible, falling back to default handler implementation"));
13595 m_disable_multi_read= TRUE;
13596 DBUG_RETURN(handler::read_multi_range_first(found_range_p,
13597 ranges,
13598 range_count,
13599 sorted,
13600 buffer));
13601 }
13602
13603 /**
13604 * There may still be an open m_multi_cursor from the previous mrr access on this handler.
13605 * Close it now to free up resources for this NdbScanOperation.
13606 */
13607 if (unlikely((error= close_scan())))
13608 DBUG_RETURN(error);
13609
13610 m_disable_multi_read= FALSE;
13611
13612 /*
13613 * Copy arguments into member variables
13614 */
13615 m_multi_ranges= ranges;
13616 multi_range_curr= ranges;
13617 multi_range_end= ranges+range_count;
13618 multi_range_sorted= sorted;
13619 multi_range_buffer= buffer;
13620
13621 /*
13622 * read multi range will read ranges as follows (if not ordered)
13623 *
13624 * input read order
13625 * ====== ==========
13626 * pk-op 1 pk-op 1
13627 * pk-op 2 pk-op 2
13628 * range 3 range (3,5) NOTE result rows will be intermixed
13629 * pk-op 4 pk-op 4
13630 * range 5
13631 * pk-op 6 pk-op 6
13632 */
13633
13634 /*
13635 We first loop over all ranges, converting into primary/unique key
13636 operations if possible, and counting ranges that require an
13637 ordered index scan. If the supplied HANDLER_BUFFER is too small, we
13638 may also need to do only part of the multi read at once.
13639
13640 Afterwards, we create the ordered index scan cursor (if needed).
13641 */
13642
13643 DBUG_ASSERT(cur_index_type != UNDEFINED_INDEX);
13644 DBUG_ASSERT(m_multi_cursor==NULL);
13645 DBUG_ASSERT(m_active_query==NULL);
13646
13647 const NdbOperation* lastOp= trans ? trans->getLastDefinedOperation() : 0;
13648 const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
13649 uchar *row_buf= (uchar *)buffer->buffer;
13650 const uchar *end_of_buffer= buffer->buffer_end;
13651 uint num_scan_ranges= 0;
13652 uint i;
13653 bool any_real_read= FALSE;
13654
13655 if (m_read_before_write_removal_possible)
13656 check_read_before_write_removal();
13657 for (i= 0; i < range_count; i++)
13658 {
13659 KEY_MULTI_RANGE *r= &ranges[i];
13660
13661 part_id_range part_spec;
13662 if (m_use_partition_pruning)
13663 {
13664 get_partition_set(table, table->record[0], active_index, &r->start_key,
13665 &part_spec);
13666 DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u",
13667 part_spec.start_part, part_spec.end_part));
13668 /*
13669 If partition pruning has found no partition in set
13670 we can skip this scan
13671 */
13672 if (part_spec.start_part > part_spec.end_part)
13673 {
13674 /*
13675 We can skip this partition since the key won't fit into any
13676 partition
13677 */
13678 r->range_flag|= SKIP_RANGE;
13679 row_buf += reclength;
13680 continue;
13681 }
13682 if (!trans &&
13683 (part_spec.start_part == part_spec.end_part))
13684 if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
13685 error))))
13686 DBUG_RETURN(error);
13687 }
13688 r->range_flag&= ~(uint)SKIP_RANGE;
13689
13690 if ((m_pushed_join_operation==PUSHED_ROOT &&
13691 m_pushed_join_member->get_query_def().isScanQuery()) || // Pushed joins restricted to ordered range scan in mrr
13692 read_multi_needs_scan(cur_index_type, key_info, r))
13693 {
13694 if (!trans)
13695 {
13696 // ToDo see if we can use start_transaction_key here instead
13697 if (!m_use_partition_pruning)
13698 {
13699 get_partition_set(table, table->record[0], active_index, &r->start_key,
13700 &part_spec);
13701 if (part_spec.start_part == part_spec.end_part)
13702 {
13703 if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
13704 error))))
13705 DBUG_RETURN(error);
13706 }
13707 else if (unlikely(!(trans= start_transaction(error))))
13708 DBUG_RETURN(error);
13709 }
13710 else if (unlikely(!(trans= start_transaction(error))))
13711 DBUG_RETURN(error);
13712 }
13713
13714 any_real_read= TRUE;
13715 DBUG_PRINT("info", ("any_real_read= TRUE"));
13716
13717 /*
13718 If we reach the limit of ranges allowed in a single scan: stop
13719 here, send what we have so far, and continue when done with that.
13720 */
13721 if (i > NdbIndexScanOperation::MaxRangeNo)
13722 {
13723 DBUG_PRINT("info", ("Reached the limit of ranges allowed in a single"
13724 "scan"));
13725 break;
13726 }
13727
13728 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
13729 /* Create the scan operation for the first scan range. */
13730 if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
13731 active_index,
13732 !m_active_query && sorted))
13733 {
13734 if (!m_active_query)
13735 {
13736 const int error= create_pushed_join();
13737 if (unlikely(error))
13738 DBUG_RETURN(error);
13739
13740 NdbQuery* const query= m_active_query;
13741 if (sorted &&
13742 query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))
13743 ERR_RETURN(query->getNdbError());
13744 }
13745 }
13746 else
13747 #endif
13748 if (!m_multi_cursor)
13749 {
13750 if (m_pushed_join_operation == PUSHED_ROOT)
13751 {
13752 m_thd_ndb->m_pushed_queries_dropped++;
13753 }
13754 /* Do a multi-range index scan for ranges not done by primary/unique key. */
13755 NdbScanOperation::ScanOptions options;
13756 NdbInterpretedCode code(m_table);
13757
13758 options.optionsPresent=
13759 NdbScanOperation::ScanOptions::SO_SCANFLAGS |
13760 NdbScanOperation::ScanOptions::SO_PARALLEL;
13761
13762 options.scan_flags=
13763 NdbScanOperation::SF_ReadRangeNo |
13764 NdbScanOperation::SF_MultiRange;
13765
13766 if (lm == NdbOperation::LM_Read)
13767 options.scan_flags|= NdbScanOperation::SF_KeyInfo;
13768 if (sorted)
13769 options.scan_flags|= NdbScanOperation::SF_OrderByFull;
13770
13771 options.parallel= DEFAULT_PARALLELISM;
13772
13773 NdbOperation::GetValueSpec gets[2];
13774 if (table_share->primary_key == MAX_KEY)
13775 get_hidden_fields_scan(&options, gets);
13776
13777 if (m_cond && m_cond->generate_scan_filter(&code, &options))
13778 ERR_RETURN(code.getNdbError());
13779
13780 /* Define scan */
13781 NdbIndexScanOperation *scanOp= trans->scanIndex
13782 (m_index[active_index].ndb_record_key,
13783 m_ndb_record,
13784 lm,
13785 (uchar *)(table->read_set->bitmap),
13786 NULL, /* All bounds specified below */
13787 &options,
13788 sizeof(NdbScanOperation::ScanOptions));
13789
13790 if (!scanOp)
13791 ERR_RETURN(trans->getNdbError());
13792
13793 m_multi_cursor= scanOp;
13794
13795 /*
13796 We do not get_blob_values() here, as when using blobs we always
13797 fallback to non-batched multi range read (see if statement at
13798 top of this function).
13799 */
13800
13801 /* We set m_next_row=0 to say that no row was fetched from the scan yet. */
13802 m_next_row= 0;
13803 }
13804
13805 Ndb::PartitionSpec ndbPartitionSpec;
13806 const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;
13807
13808 /* If this table uses user-defined partitioning, use MySQLD provided
13809 * partition info as pruning info
13810 * Otherwise, scan range pruning is performed automatically by
13811 * NDBAPI based on distribution key values.
13812 */
13813 if (m_use_partition_pruning &&
13814 m_user_defined_partitioning &&
13815 (part_spec.start_part == part_spec.end_part))
13816 {
13817 DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",
13818 part_spec.start_part));
13819 ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;
13820 ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;
13821 ndbPartSpecPtr= &ndbPartitionSpec;
13822 }
13823
13824 /* Include this range in the ordered index scan. */
13825 NdbIndexScanOperation::IndexBound bound;
13826 compute_index_bounds(bound, key_info, &r->start_key, &r->end_key, 0);
13827 bound.range_no= i;
13828
13829 const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
13830 if (m_active_query)
13831 {
13832 DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));
13833 if (m_active_query->setBound(key_rec, &bound))
13834 {
13835 ERR_RETURN(trans->getNdbError());
13836 }
13837 }
13838 else
13839 {
13840 if (m_multi_cursor->setBound(key_rec,
13841 bound,
13842 ndbPartSpecPtr, // Only for user-def tables
13843 sizeof(Ndb::PartitionSpec)))
13844 {
13845 ERR_RETURN(trans->getNdbError());
13846 }
13847 }
13848
13849 r->range_flag&= ~(uint)UNIQUE_RANGE;
13850 num_scan_ranges++;
13851 }
13852 else // if ((...PUSHED_ROOT && m_pushed_join->get_query_def().isScanQuery()) ||...
13853 {
13854 if (m_pushed_join_operation == PUSHED_ROOT)
13855 {
13856 m_thd_ndb->m_pushed_queries_dropped++;
13857 }
13858 if (!trans)
13859 {
13860 DBUG_ASSERT(active_index != MAX_KEY);
13861 if (unlikely(!(trans= start_transaction_key(active_index,
13862 r->start_key.key,
13863 error))))
13864 DBUG_RETURN(error);
13865 }
13866 /*
13867 Convert to primary/unique key operation.
13868
13869 If there is not enough buffer for reading the row: stop here, send
13870 what we have so far, and continue when done with that.
13871 */
13872 if (row_buf + reclength > end_of_buffer)
13873 break;
13874
13875 if (m_read_before_write_removal_used)
13876 {
13877 r->range_flag|= READ_KEY_FROM_RANGE;
13878 continue;
13879 }
13880 else
13881 {
13882 any_real_read= TRUE;
13883 DBUG_PRINT("info", ("m_read_before_write_removal_used == FALSE, "
13884 "any_real_read= TRUE"));
13885 }
13886 r->range_flag|= UNIQUE_RANGE;
13887
13888 Uint32 partitionId;
13889 Uint32* ppartitionId = NULL;
13890
13891 if (m_user_defined_partitioning &&
13892 (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
13893 cur_index_type == PRIMARY_KEY_INDEX))
13894 {
13895 partitionId=part_spec.start_part;
13896 ppartitionId=&partitionId;
13897 }
13898
13899 DBUG_PRINT("info", ("Generating Pk/Unique key read for range %u", i));
13900
13901 // 'Pushable codepath' is incomplete and expected not
13902 // to be produced as make_join_pushed() handle
13903 // AT_MULTI_UNIQUE_KEY as non-pushable
13904 if (m_pushed_join_operation==PUSHED_ROOT &&
13905 !m_disable_pushed_join &&
13906 !m_pushed_join_member->get_query_def().isScanQuery())
13907 {
13908 DBUG_ASSERT(false); // Incomplete code, should not be executed
13909 DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
13910 const int error= pk_unique_index_read_key_pushed(active_index,
13911 r->start_key.key,
13912 ppartitionId);
13913 if (unlikely(error))
13914 DBUG_RETURN(error);
13915 }
13916 else
13917 {
13918 if (m_pushed_join_operation == PUSHED_ROOT)
13919 {
13920 DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));
13921 m_thd_ndb->m_pushed_queries_dropped++;
13922 }
13923 const NdbOperation* op;
13924 if (!(op= pk_unique_index_read_key(active_index,
13925 r->start_key.key,
13926 row_buf, lm,
13927 ppartitionId)))
13928 ERR_RETURN(trans->getNdbError());
13929 }
13930 row_buf+= reclength;
13931 }
13932 }
13933 DBUG_ASSERT(i > 0 || i == range_count); // Require progress
13934 m_multi_range_defined_end= ranges + i;
13935
13936 buffer->end_of_used_area= row_buf;
13937
13938 if (m_active_query != NULL &&
13939 m_pushed_join_member->get_query_def().isScanQuery())
13940 {
13941 m_thd_ndb->m_scan_count++;
13942 if (sorted)
13943 {
13944 m_thd_ndb->m_sorted_scan_count++;
13945 }
13946
13947 bool prunable = false;
13948 if (unlikely(m_active_query->isPrunable(prunable) != 0))
13949 ERR_RETURN(m_active_query->getNdbError());
13950 if (prunable)
13951 m_thd_ndb->m_pruned_scan_count++;
13952
13953 DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));
13954 DBUG_ASSERT(!m_multi_cursor);
13955 };
13956 if (m_multi_cursor)
13957 {
13958 DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",
13959 m_multi_cursor->getPruned()));
13960 m_thd_ndb->m_scan_count++;
13961 m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);
13962 if (sorted)
13963 {
13964 m_thd_ndb->m_sorted_scan_count++;
13965 }
13966 };
13967
13968 if (any_real_read)
13969 {
13970 /* Get pointer to first range key operation (not scans) */
13971 const NdbOperation* rangeOp= lastOp ? lastOp->next() :
13972 trans->getFirstDefinedOperation();
13973
13974 DBUG_PRINT("info", ("Executing reads"));
13975
13976 if (execute_no_commit_ie(m_thd_ndb, trans) == 0)
13977 {
13978 m_multi_range_result_ptr= buffer->buffer;
13979
13980 /* We must check the result of any primary or unique key
13981 * ranges now, as these operations may be invalidated by
13982 * further execute+releaseOperations calls on this transaction by
13983 * different handler objects.
13984 */
13985 KEY_MULTI_RANGE* rangeInfo= multi_range_curr;
13986
13987 for (;rangeInfo < m_multi_range_defined_end; rangeInfo++)
13988 {
13989 DBUG_PRINT("info", ("range flag is %u", rangeInfo->range_flag));
13990 if (rangeInfo->range_flag & SKIP_RANGE)
13991 continue;
13992
13993 if ((rangeInfo->range_flag & UNIQUE_RANGE) &&
13994 (!(rangeInfo->range_flag & READ_KEY_FROM_RANGE)))
13995 {
13996 assert(rangeOp != NULL);
13997 if (rangeOp->getNdbError().code == 0)
13998 {
13999 /* Successful read, results are in buffer.
14000 */
14001 rangeInfo->range_flag &= ~(uint)EMPTY_RANGE;
14002
14003 DBUG_PRINT("info", ("Unique range op has result"));
14004 }
14005 else
14006 {
14007 NdbError err= rangeOp->getNdbError();
14008
14009 if (err.classification !=
14010 NdbError::NoDataFound)
14011 DBUG_RETURN(ndb_err(trans));
14012
14013 DBUG_PRINT("info", ("Unique range op has no result"));
14014 /* Indicate to read_multi_range_next that this
14015 * result is empty
14016 */
14017 rangeInfo->range_flag |= EMPTY_RANGE;
14018 }
14019
14020 /* Move to next completed operation */
14021 rangeOp= trans->getNextCompletedOperation(rangeOp);
14022 }
14023
14024 /* For scan ranges, do nothing here */
14025 }
14026 }
14027 else
14028 ERR_RETURN(trans->getNdbError());
14029 }
14030
14031 DBUG_RETURN(read_multi_range_next(found_range_p));
14032 }
14033
14034 int
read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)14035 ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
14036 {
14037 DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
14038 if (m_disable_multi_read)
14039 {
14040 DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
14041 }
14042
14043 const ulong reclength= table_share->reclength;
14044
14045 while (multi_range_curr < m_multi_range_defined_end)
14046 {
14047 if (multi_range_curr->range_flag & SKIP_RANGE)
14048 {
14049 /* Nothing in this range, move to next one, skipping a buffer
14050 'slot'
14051 */
14052 m_multi_range_result_ptr += reclength;
14053 multi_range_curr++;
14054 }
14055 else if (multi_range_curr->range_flag & READ_KEY_FROM_RANGE)
14056 {
14057 DBUG_PRINT("info", ("using read before write removal optimisation"));
14058 KEY* key_info= table->key_info + active_index;
14059 key_restore(table->record[0], (uchar*)multi_range_curr->start_key.key,
14060 key_info, key_info->key_length);
14061 table->status= 0;
14062 multi_range_curr++;
14063 DBUG_RETURN(0);
14064 }
14065 else if (multi_range_curr->range_flag & UNIQUE_RANGE)
14066 {
14067 /*
14068 Move to next range; we can have at most one record from a unique range.
14069 */
14070 KEY_MULTI_RANGE *old_multi_range_curr= multi_range_curr;
14071 multi_range_curr= old_multi_range_curr + 1;
14072 /*
14073 Clear m_active_cursor; it is used as a flag in update_row() /
14074 delete_row() to know whether the current tuple is from a scan
14075 or pk operation.
14076 */
14077 m_active_cursor= NULL;
14078 const uchar *src_row= m_multi_range_result_ptr;
14079 m_multi_range_result_ptr= src_row + table_share->reclength;
14080
14081 if (!(old_multi_range_curr->range_flag & EMPTY_RANGE))
14082 {
14083 *multi_range_found_p= old_multi_range_curr;
14084 memcpy(table->record[0], src_row, table_share->reclength);
14085 DBUG_RETURN(0);
14086 }
14087
14088 /* No row found, so fall through to try the next range. */
14089 }
14090 else
14091 {
14092 /* An index scan range. */
14093 {
14094 int res;
14095 if ((res= read_multi_range_fetch_next()) != 0)
14096 DBUG_RETURN(res);
14097 }
14098 if (!m_next_row)
14099 {
14100 /*
14101 The whole scan is done, and the cursor has been closed.
14102 So nothing more for this range. Move to next.
14103 */
14104 multi_range_curr++;
14105 }
14106 else
14107 {
14108 int current_range_no= m_current_range_no;
14109 int expected_range_no;
14110 /*
14111 For a sorted index scan, we will receive rows in increasing range_no
14112 order, so we can return ranges in order, pausing when range_no
14113 indicate that the currently processed range (multi_range_curr) is
14114 done.
14115
14116 But for unsorted scan, we may receive a high range_no from one
14117 fragment followed by a low range_no from another fragment. So we
14118 need to process all index scan ranges together.
14119 */
14120 if (!multi_range_sorted ||
14121 (expected_range_no= multi_range_curr - m_multi_ranges)
14122 == current_range_no)
14123 {
14124 *multi_range_found_p= m_multi_ranges + current_range_no;
14125 /* Copy out data from the new row. */
14126 unpack_record(table->record[0], m_next_row);
14127 table->status= 0;
14128 /*
14129 Mark that we have used this row, so we need to fetch a new
14130 one on the next call.
14131 */
14132 m_next_row= 0;
14133 /*
14134 Set m_active_cursor; it is used as a flag in update_row() /
14135 delete_row() to know whether the current tuple is from a scan or
14136 pk operation.
14137 */
14138 m_active_cursor= m_multi_cursor;
14139
14140 DBUG_RETURN(0);
14141 }
14142 else if (current_range_no > expected_range_no)
14143 {
14144 /* Nothing more in scan for this range. Move to next. */
14145 multi_range_curr++;
14146 }
14147 else
14148 {
14149 /*
14150 Should not happen. Ranges should be returned from NDB API in
14151 the order we requested them.
14152 */
14153 DBUG_ASSERT(0);
14154 multi_range_curr++; // Attempt to carry on
14155 }
14156 }
14157 }
14158 }
14159
14160 if (multi_range_curr == multi_range_end)
14161 {
14162 DBUG_RETURN(HA_ERR_END_OF_FILE);
14163 }
14164
14165 /*
14166 Read remaining ranges
14167 */
14168 DBUG_RETURN(read_multi_range_first(multi_range_found_p,
14169 multi_range_curr,
14170 multi_range_end - multi_range_curr,
14171 multi_range_sorted,
14172 multi_range_buffer));
14173 }
14174
14175 /*
14176 Fetch next row from the ordered index cursor in multi range scan.
14177
14178 We keep the next row in m_next_row, and the range_no of the
14179 next row in m_current_range_no. This is used in sorted index scan
14180 to correctly interleave rows from primary/unique key operations with
14181 rows from the scan.
14182 */
14183 int
read_multi_range_fetch_next()14184 ha_ndbcluster::read_multi_range_fetch_next()
14185 {
14186 DBUG_ENTER("read_multi_range_fetch_next");
14187
14188 if (m_active_query)
14189 {
14190 DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));
14191 if (!m_next_row)
14192 {
14193 int res= fetch_next_pushed();
14194 if (res == NdbQuery::NextResult_gotRow)
14195 {
14196 m_current_range_no= 0;
14197 // m_current_range_no= cursor->get_range_no(); // FIXME SPJ, need rangeNo from index scan
14198 }
14199 else if (res == NdbQuery::NextResult_scanComplete)
14200 {
14201 /* We have fetched the last row from the scan. */
14202 m_active_query->close(FALSE);
14203 m_active_query= 0;
14204 m_next_row= 0;
14205 DBUG_RETURN(0);
14206 }
14207 else
14208 {
14209 /* An error. */
14210 DBUG_RETURN(res);
14211 }
14212 }
14213 }
14214 else if (m_multi_cursor)
14215 {
14216 if (!m_next_row)
14217 {
14218 NdbIndexScanOperation *cursor= (NdbIndexScanOperation *)m_multi_cursor;
14219 int res= fetch_next(cursor);
14220 if (res == 0)
14221 {
14222 m_current_range_no= cursor->get_range_no();
14223 }
14224 else if (res == 1)
14225 {
14226 /* We have fetched the last row from the scan. */
14227 cursor->close(FALSE, TRUE);
14228 m_active_cursor= 0;
14229 m_multi_cursor= 0;
14230 m_next_row= 0;
14231 DBUG_RETURN(0);
14232 }
14233 else
14234 {
14235 /* An error. */
14236 DBUG_RETURN(res);
14237 }
14238 }
14239 }
14240 DBUG_RETURN(0);
14241 }
14242 #endif
14243
14244 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
14245
14246 /**
14247 * Try to find pushable subsets of a join plan.
14248 * @param hton unused (maybe useful for other engines).
14249 * @param thd Thread.
14250 * @param plan The join plan to examine.
14251 * @return Possible error code.
14252 */
14253 static
ndbcluster_make_pushed_join(handlerton * hton,THD * thd,AQP::Join_plan * plan)14254 int ndbcluster_make_pushed_join(handlerton *hton,
14255 THD* thd,
14256 AQP::Join_plan* plan)
14257 {
14258 DBUG_ENTER("ndbcluster_make_pushed_join");
14259 (void)ha_ndb_ext; // prevents compiler warning.
14260
14261 if (THDVAR(thd, join_pushdown))
14262 {
14263 ndb_pushed_builder_ctx pushed_builder(*plan);
14264
14265 for (uint i= 0; i < plan->get_access_count()-1; i++)
14266 {
14267 const AQP::Table_access* const join_root= plan->get_table_access(i);
14268 const ndb_pushed_join* pushed_join= NULL;
14269
14270 // Try to build a ndb_pushed_join starting from 'join_root'
14271 int error= pushed_builder.make_pushed_join(join_root, pushed_join);
14272 if (unlikely(error))
14273 {
14274 if (error < 0) // getNdbError() gives us the error code
14275 {
14276 ERR_SET(pushed_builder.getNdbError(),error);
14277 }
14278 join_root->get_table()->file->print_error(error, MYF(0));
14279 DBUG_RETURN(error);
14280 }
14281
14282 // Assign any produced pushed_join definitions to
14283 // the ha_ndbcluster instance representing its root.
14284 if (pushed_join != NULL)
14285 {
14286 ha_ndbcluster* const handler=
14287 static_cast<ha_ndbcluster*>(join_root->get_table()->file);
14288
14289 error= handler->assign_pushed_join(pushed_join);
14290 if (unlikely(error))
14291 {
14292 delete pushed_join;
14293 handler->print_error(error, MYF(0));
14294 DBUG_RETURN(error);
14295 }
14296 }
14297 }
14298 }
14299 DBUG_RETURN(0);
14300 }
14301 #endif
14302
14303
14304 /**
14305 * In case a pushed join having the table for this handler as its root
14306 * has been produced. ::assign_pushed_join() is responsible for setting
14307 * up this ha_ndbcluster instance such that the prepared NdbQuery
14308 * might be instantiated at execution time.
14309 */
14310 int
assign_pushed_join(const ndb_pushed_join * pushed_join)14311 ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)
14312 {
14313 DBUG_ENTER("assign_pushed_join");
14314 m_thd_ndb->m_pushed_queries_defined++;
14315
14316 for (uint i = 0; i < pushed_join->get_operation_count(); i++)
14317 {
14318 const TABLE* const tab= pushed_join->get_table(i);
14319 DBUG_ASSERT(tab->file->ht == ht);
14320 ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);
14321 child->m_pushed_join_member= pushed_join;
14322 child->m_pushed_join_operation= i;
14323 }
14324
14325 DBUG_PRINT("info", ("Assigned pushed join with %d child operations",
14326 pushed_join->get_operation_count()-1));
14327
14328 DBUG_RETURN(0);
14329 }
14330
14331
14332 /**
14333 * First level of filtering tables which *maybe* may be part of
14334 * a pushed query: Returning 'false' will eliminate this table
14335 * from being a part of a pushed join.
14336 * A 'reason' for rejecting this table is required if 'false'
14337 * is returned.
14338 */
14339 bool
maybe_pushable_join(const char * & reason) const14340 ha_ndbcluster::maybe_pushable_join(const char*& reason) const
14341 {
14342 reason= "";
14343 if (uses_blob_value(table->read_set))
14344 {
14345 reason= "select list can't contain BLOB columns";
14346 return false;
14347 }
14348 if (m_user_defined_partitioning)
14349 {
14350 reason= "has user defined partioning";
14351 return false;
14352 }
14353
14354 // Pushed operations may not set locks.
14355 const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);
14356 switch (lockMode)
14357 {
14358 case NdbOperation::LM_CommittedRead:
14359 return true;
14360
14361 case NdbOperation::LM_Read:
14362 case NdbOperation::LM_Exclusive:
14363 reason= "lock modes other than 'read committed' not implemented";
14364 return false;
14365
14366 default: // Other lock modes not used by handler.
14367 assert(false);
14368 return false;
14369 }
14370
14371 return true;
14372 }
14373
14374 /**
14375 * Check if this table access operation (and a number of succeding operation)
14376 * can be pushed to the cluster and executed there. This requires that there
14377 * is an NdbQueryDefiniton and that it still matches the corresponds to the
14378 * type of operation that we intend to execute. (The MySQL server will
14379 * sometimes change its mind and replace a scan with a lookup or vice versa
14380 * as it works its way into the nested loop join.)
14381 *
14382 * @param type This is the operation type that the server want to execute.
14383 * @param idx Index used whenever relevant for operation type
14384 * @param needSorted True if the root operation is an ordered index scan
14385 * with sorted results.
14386 * @return True if the operation may be pushed.
14387 */
14388 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
14389 bool
check_if_pushable(int type,uint idx,bool needSorted) const14390 ha_ndbcluster::check_if_pushable(int type, //NdbQueryOperationDef::Type,
14391 uint idx,
14392 bool needSorted) const
14393 {
14394 if (m_disable_pushed_join)
14395 {
14396 DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
14397 return false;
14398 }
14399 return m_pushed_join_operation == PUSHED_ROOT
14400 && m_pushed_join_member != NULL
14401 && m_pushed_join_member->match_definition(
14402 type,
14403 (idx<MAX_KEY) ? &m_index[idx] : NULL,
14404 needSorted);
14405 }
14406
14407 int
create_pushed_join(const NdbQueryParamValue * keyFieldParams,uint paramCnt)14408 ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)
14409 {
14410 DBUG_ENTER("create_pushed_join");
14411 DBUG_ASSERT(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);
14412
14413 NdbQuery* const query=
14414 m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);
14415
14416 if (unlikely(query==NULL))
14417 ERR_RETURN(m_thd_ndb->trans->getNdbError());
14418
14419 // Bind to instantiated NdbQueryOperations.
14420 for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)
14421 {
14422 const TABLE* const tab= m_pushed_join_member->get_table(i);
14423 ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);
14424
14425 DBUG_ASSERT(handler->m_pushed_join_operation==(int)i);
14426 NdbQueryOperation* const op= query->getQueryOperation(i);
14427 handler->m_pushed_operation= op;
14428
14429 // Bind to result buffers
14430 const NdbRecord* const resultRec= handler->m_ndb_record;
14431 int res= op->setResultRowRef(
14432 resultRec,
14433 handler->_m_next_row,
14434 (uchar *)(tab->read_set->bitmap));
14435 if (unlikely(res))
14436 ERR_RETURN(query->getNdbError());
14437
14438 // We clear 'm_next_row' to say that no row was fetched from the query yet.
14439 handler->_m_next_row= 0;
14440 }
14441
14442 DBUG_ASSERT(m_active_query==NULL);
14443 m_active_query= query;
14444 m_thd_ndb->m_pushed_queries_executed++;
14445
14446 DBUG_RETURN(0);
14447 }
14448 #endif
14449
14450
14451 /**
14452 * Check if this table access operation is part of a pushed join operation
14453 * which is actively executing.
14454 */
14455 bool
check_is_pushed() const14456 ha_ndbcluster::check_is_pushed() const
14457 {
14458 if (m_pushed_join_member == NULL)
14459 return false;
14460
14461 handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;
14462 return (static_cast<ha_ndbcluster*>(root)->m_active_query);
14463 }
14464
14465 uint
number_of_pushed_joins() const14466 ha_ndbcluster::number_of_pushed_joins() const
14467 {
14468 if (m_pushed_join_member == NULL)
14469 return 0;
14470 else
14471 return m_pushed_join_member->get_operation_count();
14472 }
14473
14474 const TABLE*
root_of_pushed_join() const14475 ha_ndbcluster::root_of_pushed_join() const
14476 {
14477 if (m_pushed_join_member == NULL)
14478 return NULL;
14479 else
14480 return m_pushed_join_member->get_table(PUSHED_ROOT);
14481 }
14482
14483 const TABLE*
parent_of_pushed_join() const14484 ha_ndbcluster::parent_of_pushed_join() const
14485 {
14486 if (m_pushed_join_operation > PUSHED_ROOT)
14487 {
14488 DBUG_ASSERT(m_pushed_join_member!=NULL);
14489 uint parent_ix= m_pushed_join_member
14490 ->get_query_def().getQueryOperation(m_pushed_join_operation)
14491 ->getParentOperation(0)
14492 ->getQueryOperationIx();
14493 return m_pushed_join_member->get_table(parent_ix);
14494 }
14495 return NULL;
14496 }
14497
14498 /**
14499 @param[in] comment table comment defined by user
14500
14501 @return
14502 table comment + additional
14503 */
14504 char*
update_table_comment(const char * comment)14505 ha_ndbcluster::update_table_comment(
14506 /* out: table comment + additional */
14507 const char* comment)/* in: table comment defined by user */
14508 {
14509 THD *thd= current_thd;
14510 uint length= strlen(comment);
14511 if (length > 64000 - 3)
14512 {
14513 return((char*)comment); /* string too long */
14514 }
14515
14516 Ndb* ndb;
14517 if (!(ndb= get_ndb(thd)))
14518 {
14519 return((char*)comment);
14520 }
14521
14522 if (ndb->setDatabaseName(m_dbname))
14523 {
14524 return((char*)comment);
14525 }
14526 const NDBTAB* tab= m_table;
14527 DBUG_ASSERT(tab != NULL);
14528
14529 char *str;
14530 const char *fmt="%s%snumber_of_replicas: %d";
14531 const unsigned fmt_len_plus_extra= length + strlen(fmt);
14532 if ((str= (char*) my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
14533 {
14534 sql_print_error("ha_ndbcluster::update_table_comment: "
14535 "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra);
14536 return (char*)comment;
14537 }
14538
14539 my_snprintf(str,fmt_len_plus_extra,fmt,comment,
14540 length > 0 ? " ":"",
14541 tab->getReplicaCount());
14542 return str;
14543 }
14544
14545
14546 /**
14547 Utility thread main loop.
14548 */
ndb_util_thread_func(void * arg MY_ATTRIBUTE ((unused)))14549 pthread_handler_t ndb_util_thread_func(void *arg MY_ATTRIBUTE((unused)))
14550 {
14551 THD *thd; /* needs to be first for thread_stack */
14552 struct timespec abstime;
14553 Thd_ndb *thd_ndb= NULL;
14554 uint share_list_size= 0;
14555 NDB_SHARE **share_list= NULL;
14556
14557 my_thread_init();
14558 DBUG_ENTER("ndb_util_thread");
14559 DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));
14560
14561 pthread_mutex_lock(&LOCK_ndb_util_thread);
14562
14563 thd= new THD; /* note that contructor of THD uses DBUG_ */
14564 if (thd == NULL)
14565 {
14566 my_errno= HA_ERR_OUT_OF_MEM;
14567 DBUG_RETURN(NULL);
14568 }
14569 THD_CHECK_SENTRY(thd);
14570 pthread_detach_this_thread();
14571 ndb_util_thread= pthread_self();
14572
14573 thd->thread_stack= (char*)&thd; /* remember where our stack is */
14574 if (thd->store_globals())
14575 goto ndb_util_thread_fail;
14576 lex_start(thd);
14577 thd->init_for_queries();
14578 thd_set_command(thd, COM_DAEMON);
14579 #ifndef NDB_THD_HAS_NO_VERSION
14580 thd->version=refresh_version;
14581 #endif
14582 thd->client_capabilities = 0;
14583 thd->security_ctx->skip_grants();
14584 my_net_init(&thd->net, 0);
14585
14586 CHARSET_INFO *charset_connection;
14587 charset_connection= get_charset_by_csname("utf8",
14588 MY_CS_PRIMARY, MYF(MY_WME));
14589 thd->variables.character_set_client= charset_connection;
14590 thd->variables.character_set_results= charset_connection;
14591 thd->variables.collation_connection= charset_connection;
14592 thd->update_charset();
14593
14594 /* Signal successful initialization */
14595 ndb_util_thread_running= 1;
14596 pthread_cond_signal(&COND_ndb_util_ready);
14597 pthread_mutex_unlock(&LOCK_ndb_util_thread);
14598
14599 /*
14600 wait for mysql server to start
14601 */
14602 mysql_mutex_lock(&LOCK_server_started);
14603 while (!mysqld_server_started)
14604 {
14605 set_timespec(abstime, 1);
14606 mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,
14607 &abstime);
14608 if (ndbcluster_terminating)
14609 {
14610 mysql_mutex_unlock(&LOCK_server_started);
14611 pthread_mutex_lock(&LOCK_ndb_util_thread);
14612 goto ndb_util_thread_end;
14613 }
14614 }
14615 mysql_mutex_unlock(&LOCK_server_started);
14616
14617 /*
14618 Wait for cluster to start
14619 */
14620 pthread_mutex_lock(&LOCK_ndb_util_thread);
14621 while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
14622 {
14623 /* ndb not connected yet */
14624 pthread_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread);
14625 if (ndbcluster_terminating)
14626 goto ndb_util_thread_end;
14627 }
14628 pthread_mutex_unlock(&LOCK_ndb_util_thread);
14629
14630 /* Get thd_ndb for this thread */
14631 if (!(thd_ndb= Thd_ndb::seize(thd)))
14632 {
14633 sql_print_error("Could not allocate Thd_ndb object");
14634 pthread_mutex_lock(&LOCK_ndb_util_thread);
14635 goto ndb_util_thread_end;
14636 }
14637 thd_set_thd_ndb(thd, thd_ndb);
14638 thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
14639
14640 if (opt_ndb_extra_logging && ndb_binlog_running)
14641 sql_print_information("NDB Binlog: Ndb tables initially read only.");
14642
14643 set_timespec(abstime, 0);
14644 for (;;)
14645 {
14646 pthread_mutex_lock(&LOCK_ndb_util_thread);
14647 if (!ndbcluster_terminating)
14648 pthread_cond_timedwait(&COND_ndb_util_thread,
14649 &LOCK_ndb_util_thread,
14650 &abstime);
14651 if (ndbcluster_terminating) /* Shutting down server */
14652 goto ndb_util_thread_end;
14653 pthread_mutex_unlock(&LOCK_ndb_util_thread);
14654 #ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
14655 DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",
14656 opt_ndb_cache_check_time));
14657 #endif
14658
14659 /*
14660 Check if the Ndb object in thd_ndb is still valid(it will be
14661 invalid if connection to cluster has been lost) and recycle
14662 it if necessary.
14663 */
14664 if (!check_ndb_in_thd(thd, false))
14665 {
14666 set_timespec(abstime, 1);
14667 continue;
14668 }
14669
14670 /*
14671 Regularly give the ndb_binlog component chance to set it self up
14672 i.e at first start it needs to create the ndb_* system tables
14673 and setup event operations on those. In case of lost connection
14674 to cluster, the ndb_* system tables are hopefully still there
14675 but the event operations need to be recreated.
14676 */
14677 if (!ndb_binlog_setup(thd))
14678 {
14679 /* Failed to setup binlog, try again in 1 second */
14680 set_timespec(abstime, 1);
14681 continue;
14682 }
14683
14684 if (opt_ndb_cache_check_time == 0)
14685 {
14686 /* Wake up in 1 second to check if value has changed */
14687 set_timespec(abstime, 1);
14688 continue;
14689 }
14690
14691 /* Lock mutex and fill list with pointers to all open tables */
14692 NDB_SHARE *share;
14693 pthread_mutex_lock(&ndbcluster_mutex);
14694 uint i, open_count, record_count= ndbcluster_open_tables.records;
14695 if (share_list_size < record_count)
14696 {
14697 NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
14698 if (!new_share_list)
14699 {
14700 sql_print_warning("ndb util thread: malloc failure, "
14701 "query cache not maintained properly");
14702 pthread_mutex_unlock(&ndbcluster_mutex);
14703 goto next; // At least do not crash
14704 }
14705 delete [] share_list;
14706 share_list_size= record_count;
14707 share_list= new_share_list;
14708 }
14709 for (i= 0, open_count= 0; i < record_count; i++)
14710 {
14711 share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);
14712 if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
14713 <= 0)
14714 continue; // injector thread is the only user, skip statistics
14715 /* ndb_share reference temporary, free below */
14716 share->use_count++; /* Make sure the table can't be closed */
14717 share->util_thread= true;
14718 DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u",
14719 share->key, share->use_count));
14720 DBUG_PRINT("ndb_util_thread",
14721 ("Found open table[%d]: %s, use_count: %d",
14722 i, share->table_name, share->use_count));
14723
14724 /* Store pointer to table */
14725 share_list[open_count++]= share;
14726 }
14727 pthread_mutex_unlock(&ndbcluster_mutex);
14728
14729 /* Iterate through the open files list */
14730 for (i= 0; i < open_count; i++)
14731 {
14732 share= share_list[i];
14733 if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
14734 <= 1)
14735 {
14736 /*
14737 Util thread and injector thread is the only user, skip statistics
14738 */
14739 /* ndb_share reference temporary free */
14740 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
14741 share->key, share->use_count));
14742
14743 pthread_mutex_lock(&ndbcluster_mutex);
14744 share->util_thread= false;
14745 free_share(&share, true);
14746 pthread_mutex_unlock(&ndbcluster_mutex);
14747 continue;
14748 }
14749 DBUG_PRINT("ndb_util_thread",
14750 ("Fetching commit count for: %s", share->key));
14751
14752 struct Ndb_statistics stat;
14753 uint lock;
14754 pthread_mutex_lock(&share->mutex);
14755 lock= share->commit_count_lock;
14756 pthread_mutex_unlock(&share->mutex);
14757 {
14758 /* Contact NDB to get commit count for table */
14759 Ndb* ndb= thd_ndb->ndb;
14760 if (ndb->setDatabaseName(share->db))
14761 {
14762 goto loop_next;
14763 }
14764 Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
14765 if (ndbtab_g.get_table() &&
14766 ndb_get_table_statistics(thd, NULL, FALSE, ndb,
14767 ndbtab_g.get_table()->getDefaultRecord(),
14768 &stat) == 0)
14769 {
14770 #ifndef DBUG_OFF
14771 char buff[22], buff2[22];
14772 #endif
14773 DBUG_PRINT("info",
14774 ("Table: %s commit_count: %s rows: %s",
14775 share->key,
14776 llstr(stat.commit_count, buff),
14777 llstr(stat.row_count, buff2)));
14778 }
14779 else
14780 {
14781 DBUG_PRINT("ndb_util_thread",
14782 ("Error: Could not get commit count for table %s",
14783 share->key));
14784 stat.commit_count= 0;
14785 }
14786 }
14787 loop_next:
14788 pthread_mutex_lock(&share->mutex);
14789 if (share->commit_count_lock == lock)
14790 share->commit_count= stat.commit_count;
14791 pthread_mutex_unlock(&share->mutex);
14792
14793 /* ndb_share reference temporary free */
14794 DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u",
14795 share->key, share->use_count));
14796 pthread_mutex_lock(&ndbcluster_mutex);
14797 share->util_thread= false;
14798 free_share(&share, true);
14799 pthread_mutex_unlock(&ndbcluster_mutex);
14800 }
14801 next:
14802 /* Calculate new time to wake up */
14803 set_timespec_nsec(abstime, opt_ndb_cache_check_time * 1000000ULL);
14804 }
14805
14806 pthread_mutex_lock(&LOCK_ndb_util_thread);
14807
14808 ndb_util_thread_end:
14809 net_end(&thd->net);
14810 ndb_util_thread_fail:
14811 if (share_list)
14812 delete [] share_list;
14813 if (thd_ndb)
14814 {
14815 Thd_ndb::release(thd_ndb);
14816 thd_set_thd_ndb(thd, NULL);
14817 }
14818 thd->cleanup();
14819 delete thd;
14820
14821 /* signal termination */
14822 ndb_util_thread_running= 0;
14823 pthread_cond_signal(&COND_ndb_util_ready);
14824 pthread_mutex_unlock(&LOCK_ndb_util_thread);
14825 DBUG_PRINT("exit", ("ndb_util_thread"));
14826
14827 DBUG_LEAVE; // Must match DBUG_ENTER()
14828 my_thread_end();
14829 pthread_exit(0);
14830 return NULL; // Avoid compiler warnings
14831 }
14832
14833 /*
14834 Condition pushdown
14835 */
14836 /**
14837 Push a condition to ndbcluster storage engine for evaluation
14838 during table and index scans. The conditions will be stored on a stack
14839 for possibly storing several conditions. The stack can be popped
14840 by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
14841 will clear the stack.
14842 The current implementation supports arbitrary AND/OR nested conditions
14843 with comparisons between columns and constants (including constant
14844 expressions and function calls) and the following comparison operators:
14845 =, !=, >, >=, <, <=, "is null", and "is not null".
14846
14847 @retval
14848 NULL The condition was supported and will be evaluated for each
14849 row found during the scan
14850 @retval
14851 cond The condition was not supported and all rows will be returned from
14852 the scan for evaluation (and thus not saved on stack)
14853 */
14854 const
14855 Item*
cond_push(const Item * cond)14856 ha_ndbcluster::cond_push(const Item *cond)
14857 {
14858 DBUG_ENTER("ha_ndbcluster::cond_push");
14859
14860 #if 1
14861 if (cond->used_tables() & ~table->map)
14862 {
14863 /**
14864 * 'cond' refers fields from other tables, or other instances
14865 * of this table, -> reject it.
14866 * (Optimizer need to have a better understanding of what is
14867 * pushable by each handler.)
14868 */
14869 DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););
14870 DBUG_RETURN(cond);
14871 }
14872 #else
14873 /*
14874 Make sure that 'cond' does not refer field(s) from other tables
14875 or other instances of this table.
14876 (This was a legacy bug in optimizer)
14877 */
14878 DBUG_ASSERT(!(cond->used_tables() & ~table->map));
14879 #endif
14880 if (!m_cond)
14881 m_cond= new ha_ndbcluster_cond;
14882 if (!m_cond)
14883 {
14884 my_errno= HA_ERR_OUT_OF_MEM;
14885 DBUG_RETURN(cond);
14886 }
14887 DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););
14888 DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
14889 }
14890
14891 /**
14892 Pop the top condition from the condition stack of the handler instance.
14893 */
14894 void
cond_pop()14895 ha_ndbcluster::cond_pop()
14896 {
14897 if (m_cond)
14898 m_cond->cond_pop();
14899 }
14900
14901
14902 /*
14903 Implements the SHOW NDB STATUS command.
14904 */
14905 bool
ndbcluster_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)14906 ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
14907 enum ha_stat_type stat_type)
14908 {
14909 char name[16];
14910 char buf[IO_SIZE];
14911 uint buflen;
14912 DBUG_ENTER("ndbcluster_show_status");
14913
14914 if (stat_type != HA_ENGINE_STATUS)
14915 {
14916 DBUG_RETURN(FALSE);
14917 }
14918
14919 Ndb* ndb= check_ndb_in_thd(thd);
14920 Thd_ndb *thd_ndb= get_thd_ndb(thd);
14921 struct st_ndb_status ns;
14922 if (ndb)
14923 update_status_variables(thd_ndb, &ns, thd_ndb->connection);
14924 else
14925 update_status_variables(NULL, &ns, g_ndb_cluster_connection);
14926
14927 buflen=
14928 my_snprintf(buf, sizeof(buf),
14929 "cluster_node_id=%ld, "
14930 "connected_host=%s, "
14931 "connected_port=%ld, "
14932 "number_of_data_nodes=%ld, "
14933 "number_of_ready_data_nodes=%ld, "
14934 "connect_count=%ld",
14935 ns.cluster_node_id,
14936 ns.connected_host,
14937 ns.connected_port,
14938 ns.number_of_data_nodes,
14939 ns.number_of_ready_data_nodes,
14940 ns.connect_count);
14941 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14942 STRING_WITH_LEN("connection"), buf, buflen))
14943 DBUG_RETURN(TRUE);
14944
14945 for (int i= 0; i < MAX_NDB_NODES; i++)
14946 {
14947 if (ns.transaction_hint_count[i] > 0 ||
14948 ns.transaction_no_hint_count[i] > 0)
14949 {
14950 uint namelen= my_snprintf(name, sizeof(name), "node[%d]", i);
14951 buflen= my_snprintf(buf, sizeof(buf),
14952 "transaction_hint=%ld, transaction_no_hint=%ld",
14953 ns.transaction_hint_count[i],
14954 ns.transaction_no_hint_count[i]);
14955 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14956 name, namelen, buf, buflen))
14957 DBUG_RETURN(TRUE);
14958 }
14959 }
14960
14961 if (ndb)
14962 {
14963 Ndb::Free_list_usage tmp;
14964 tmp.m_name= 0;
14965 while (ndb->get_free_list_usage(&tmp))
14966 {
14967 buflen=
14968 my_snprintf(buf, sizeof(buf),
14969 "created=%u, free=%u, sizeof=%u",
14970 tmp.m_created, tmp.m_free, tmp.m_sizeof);
14971 if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14972 tmp.m_name, strlen(tmp.m_name), buf, buflen))
14973 DBUG_RETURN(TRUE);
14974 }
14975 }
14976 ndbcluster_show_status_binlog(thd, stat_print, stat_type);
14977
14978 DBUG_RETURN(FALSE);
14979 }
14980
14981
get_default_no_partitions(HA_CREATE_INFO * create_info)14982 int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info)
14983 {
14984 if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))
14985 {
14986 err:
14987 my_error(HA_ERR_NO_CONNECTION, MYF(0));
14988 return -1;
14989 }
14990
14991 THD* thd = current_thd;
14992 if (thd == 0)
14993 goto err;
14994 Thd_ndb * thd_ndb = get_thd_ndb(thd);
14995 if (thd_ndb == 0)
14996 goto err;
14997
14998 ha_rows max_rows, min_rows;
14999 if (create_info)
15000 {
15001 max_rows= create_info->max_rows;
15002 min_rows= create_info->min_rows;
15003 }
15004 else
15005 {
15006 max_rows= table_share->max_rows;
15007 min_rows= table_share->min_rows;
15008 }
15009 uint no_fragments= get_no_fragments(max_rows >= min_rows ?
15010 max_rows : min_rows);
15011 uint reported_frags;
15012 adjusted_frag_count(thd_ndb->ndb,
15013 no_fragments,
15014 reported_frags);
15015 return reported_frags;
15016 }
15017
calculate_key_hash_value(Field ** field_array)15018 uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)
15019 {
15020 Uint32 hash_value;
15021 struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];
15022 struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];
15023 Uint32 i= 0;
15024 int ret_val;
15025 Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
15026 void *buf= (void*)&tmp[0];
15027 Ndb *ndb= m_thd_ndb->ndb;
15028 DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");
15029
15030 do
15031 {
15032 Field *field= *field_array;
15033 uint len= field->data_length();
15034 DBUG_ASSERT(!field->is_real_null());
15035 if (field->real_type() == MYSQL_TYPE_VARCHAR)
15036 len+= ((Field_varstring*)field)->length_bytes;
15037 key_data[i].ptr= field->ptr;
15038 key_data[i++].len= len;
15039 } while (*(++field_array));
15040 key_data[i].ptr= 0;
15041 if ((ret_val= ndb->computeHash(&hash_value, m_table,
15042 key_data_ptr, buf, sizeof(tmp))))
15043 {
15044 DBUG_PRINT("info", ("ret_val = %d", ret_val));
15045 DBUG_ASSERT(FALSE);
15046 abort();
15047 }
15048 DBUG_RETURN(hash_value);
15049 }
15050
15051
15052 /*
15053 Set-up auto-partitioning for NDB Cluster
15054
15055 SYNOPSIS
15056 set_auto_partitions()
15057 part_info Partition info struct to set-up
15058
15059 RETURN VALUE
15060 NONE
15061
15062 DESCRIPTION
15063 Set-up auto partitioning scheme for tables that didn't define any
15064 partitioning. We'll use PARTITION BY KEY() in this case which
15065 translates into partition by primary key if a primary key exists
15066 and partition by hidden key otherwise.
15067 */
15068
15069 enum ndb_distribution_enum {
15070 NDB_DISTRIBUTION_KEYHASH= 0,
15071 NDB_DISTRIBUTION_LINHASH= 1
15072 };
15073 static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };
15074 static ulong opt_ndb_distribution;
15075 static TYPELIB distribution_typelib= {
15076 array_elements(distribution_names) - 1,
15077 "",
15078 distribution_names,
15079 NULL
15080 };
15081 static MYSQL_SYSVAR_ENUM(
15082 distribution, /* name */
15083 opt_ndb_distribution, /* var */
15084 PLUGIN_VAR_RQCMDARG,
15085 "Default distribution for new tables in ndb",
15086 NULL, /* check func. */
15087 NULL, /* update func. */
15088 NDB_DISTRIBUTION_KEYHASH, /* default */
15089 &distribution_typelib /* typelib */
15090 );
15091
15092
set_auto_partitions(partition_info * part_info)15093 void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
15094 {
15095 DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
15096 part_info->list_of_part_fields= TRUE;
15097 part_info->part_type= HASH_PARTITION;
15098 switch (opt_ndb_distribution)
15099 {
15100 case NDB_DISTRIBUTION_KEYHASH:
15101 part_info->linear_hash_ind= FALSE;
15102 break;
15103 case NDB_DISTRIBUTION_LINHASH:
15104 part_info->linear_hash_ind= TRUE;
15105 break;
15106 default:
15107 DBUG_ASSERT(false);
15108 break;
15109 }
15110 DBUG_VOID_RETURN;
15111 }
15112
15113
15114 int
set_range_data(const partition_info * part_info,NdbDictionary::Table & ndbtab) const15115 ha_ndbcluster::set_range_data(const partition_info *part_info,
15116 NdbDictionary::Table& ndbtab) const
15117 {
15118 const uint num_parts = partition_info_num_parts(part_info);
15119 int error= 0;
15120 bool unsigned_flag= part_info->part_expr->unsigned_flag;
15121 DBUG_ENTER("set_range_data");
15122
15123 int32 *range_data= (int32*)my_malloc(num_parts*sizeof(int32), MYF(0));
15124 if (!range_data)
15125 {
15126 mem_alloc_error(num_parts*sizeof(int32));
15127 DBUG_RETURN(1);
15128 }
15129 for (uint i= 0; i < num_parts; i++)
15130 {
15131 longlong range_val= part_info->range_int_array[i];
15132 if (unsigned_flag)
15133 range_val-= 0x8000000000000000ULL;
15134 if (range_val < INT_MIN32 || range_val >= INT_MAX32)
15135 {
15136 if ((i != num_parts - 1) ||
15137 (range_val != LONGLONG_MAX))
15138 {
15139 my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
15140 error= 1;
15141 goto error;
15142 }
15143 range_val= INT_MAX32;
15144 }
15145 range_data[i]= (int32)range_val;
15146 }
15147 ndbtab.setRangeListData(range_data, num_parts);
15148 error:
15149 my_free((char*)range_data, MYF(0));
15150 DBUG_RETURN(error);
15151 }
15152
15153
15154 int
set_list_data(const partition_info * part_info,NdbDictionary::Table & ndbtab) const15155 ha_ndbcluster::set_list_data(const partition_info *part_info,
15156 NdbDictionary::Table& ndbtab) const
15157 {
15158 const uint num_list_values = partition_info_num_list_values(part_info);
15159 int32 *list_data= (int32*)my_malloc(num_list_values*2*sizeof(int32), MYF(0));
15160 int error= 0;
15161 bool unsigned_flag= part_info->part_expr->unsigned_flag;
15162 DBUG_ENTER("set_list_data");
15163
15164 if (!list_data)
15165 {
15166 mem_alloc_error(num_list_values*2*sizeof(int32));
15167 DBUG_RETURN(1);
15168 }
15169 for (uint i= 0; i < num_list_values; i++)
15170 {
15171 LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
15172 longlong list_val= list_entry->list_value;
15173 if (unsigned_flag)
15174 list_val-= 0x8000000000000000ULL;
15175 if (list_val < INT_MIN32 || list_val > INT_MAX32)
15176 {
15177 my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
15178 error= 1;
15179 goto error;
15180 }
15181 list_data[2*i]= (int32)list_val;
15182 list_data[2*i+1]= list_entry->partition_id;
15183 }
15184 ndbtab.setRangeListData(list_data, 2*num_list_values);
15185 error:
15186 my_free((char*)list_data, MYF(0));
15187 DBUG_RETURN(error);
15188 }
15189
15190 /*
15191 User defined partitioning set-up. We need to check how many fragments the
15192 user wants defined and which node groups to put those into. Later we also
15193 want to attach those partitions to a tablespace.
15194
15195 All the functionality of the partition function, partition limits and so
15196 forth are entirely handled by the MySQL Server. There is one exception to
15197 this rule for PARTITION BY KEY where NDB handles the hash function and
15198 this type can thus be handled transparently also by NDB API program.
15199 For RANGE, HASH and LIST and subpartitioning the NDB API programs must
15200 implement the function to map to a partition.
15201 */
15202
15203 int
set_up_partition_info(partition_info * part_info,NdbDictionary::Table & ndbtab) const15204 ha_ndbcluster::set_up_partition_info(partition_info *part_info,
15205 NdbDictionary::Table& ndbtab) const
15206 {
15207 uint32 frag_data[MAX_PARTITIONS];
15208 char *ts_names[MAX_PARTITIONS];
15209 ulong fd_index= 0, i, j;
15210 NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
15211 partition_element *part_elem;
15212 List_iterator<partition_element> part_it(part_info->partitions);
15213 int error;
15214 DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
15215
15216 if (part_info->part_type == HASH_PARTITION &&
15217 part_info->list_of_part_fields == TRUE)
15218 {
15219 Field **fields= part_info->part_field_array;
15220
15221 ftype= NDBTAB::HashMapPartition;
15222
15223 for (i= 0; i < part_info->part_field_list.elements; i++)
15224 {
15225 NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);
15226 DBUG_PRINT("info",("setting dist key on %s", col->getName()));
15227 col->setPartitionKey(TRUE);
15228 }
15229 }
15230 else
15231 {
15232 if (!current_thd->variables.new_mode)
15233 {
15234 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
15235 ER_ILLEGAL_HA_CREATE_OPTION,
15236 ER(ER_ILLEGAL_HA_CREATE_OPTION),
15237 ndbcluster_hton_name,
15238 "LIST, RANGE and HASH partition disabled by default,"
15239 " use --new option to enable");
15240 DBUG_RETURN(HA_ERR_UNSUPPORTED);
15241 }
15242 /*
15243 Create a shadow field for those tables that have user defined
15244 partitioning. This field stores the value of the partition
15245 function such that NDB can handle reorganisations of the data
15246 even when the MySQL Server isn't available to assist with
15247 calculation of the partition function value.
15248 */
15249 NDBCOL col;
15250 DBUG_PRINT("info", ("Generating partition func value field"));
15251 col.setName("$PART_FUNC_VALUE");
15252 col.setType(NdbDictionary::Column::Int);
15253 col.setLength(1);
15254 col.setNullable(FALSE);
15255 col.setPrimaryKey(FALSE);
15256 col.setAutoIncrement(FALSE);
15257 ndbtab.addColumn(col);
15258 if (part_info->part_type == RANGE_PARTITION)
15259 {
15260 if ((error= set_range_data(part_info, ndbtab)))
15261 {
15262 DBUG_RETURN(error);
15263 }
15264 }
15265 else if (part_info->part_type == LIST_PARTITION)
15266 {
15267 if ((error= set_list_data(part_info, ndbtab)))
15268 {
15269 DBUG_RETURN(error);
15270 }
15271 }
15272 }
15273 ndbtab.setFragmentType(ftype);
15274 i= 0;
15275 do
15276 {
15277 uint ng;
15278 part_elem= part_it++;
15279 if (!part_info->is_sub_partitioned())
15280 {
15281 ng= part_elem->nodegroup_id;
15282 ts_names[fd_index]= part_elem->tablespace_name;
15283 frag_data[fd_index++]= ng;
15284 }
15285 else
15286 {
15287 List_iterator<partition_element> sub_it(part_elem->subpartitions);
15288 j= 0;
15289 do
15290 {
15291 part_elem= sub_it++;
15292 ng= part_elem->nodegroup_id;
15293 ts_names[fd_index]= part_elem->tablespace_name;
15294 frag_data[fd_index++]= ng;
15295 } while (++j < partition_info_num_subparts(part_info));
15296 }
15297 } while (++i < partition_info_num_parts(part_info));
15298
15299 const bool use_default_num_parts =
15300 partition_info_use_default_num_partitions(part_info);
15301 ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);
15302 ndbtab.setLinearFlag(part_info->linear_hash_ind);
15303 {
15304 ha_rows max_rows= table_share->max_rows;
15305 ha_rows min_rows= table_share->min_rows;
15306 if (max_rows < min_rows)
15307 max_rows= min_rows;
15308 if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
15309 {
15310 ndbtab.setMaxRows(max_rows);
15311 ndbtab.setMinRows(min_rows);
15312 }
15313 }
15314 ndbtab.setFragmentCount(fd_index);
15315 ndbtab.setFragmentData(frag_data, fd_index);
15316 DBUG_RETURN(0);
15317 }
15318
15319 #ifndef NDB_WITHOUT_ONLINE_ALTER
15320 static
supported_alter_operations()15321 HA_ALTER_FLAGS supported_alter_operations()
15322 {
15323 HA_ALTER_FLAGS alter_flags;
15324 return alter_flags |
15325 HA_ADD_INDEX |
15326 HA_DROP_INDEX |
15327 HA_ADD_UNIQUE_INDEX |
15328 HA_DROP_UNIQUE_INDEX |
15329 HA_ADD_COLUMN |
15330 HA_COLUMN_STORAGE |
15331 HA_COLUMN_FORMAT |
15332 HA_ADD_PARTITION |
15333 HA_ALTER_TABLE_REORG |
15334 HA_CHANGE_AUTOINCREMENT_VALUE;
15335 }
15336
check_if_supported_alter(TABLE * altered_table,HA_CREATE_INFO * create_info,Alter_info * alter_info,HA_ALTER_FLAGS * alter_flags,uint table_changes)15337 int ha_ndbcluster::check_if_supported_alter(TABLE *altered_table,
15338 HA_CREATE_INFO *create_info,
15339 Alter_info *alter_info,
15340 HA_ALTER_FLAGS *alter_flags,
15341 uint table_changes)
15342 {
15343 THD *thd= current_thd;
15344 HA_ALTER_FLAGS not_supported= ~(supported_alter_operations());
15345 uint i;
15346 const NDBTAB *tab= (const NDBTAB *) m_table;
15347 HA_ALTER_FLAGS add_column;
15348 HA_ALTER_FLAGS adding;
15349 HA_ALTER_FLAGS dropping;
15350
15351 DBUG_ENTER("ha_ndbcluster::check_if_supported_alter");
15352 add_column= add_column | HA_ADD_COLUMN;
15353 adding= adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;
15354 dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15355 partition_info *part_info= altered_table->part_info;
15356 const NDBTAB *old_tab= m_table;
15357
15358 if (THDVAR(thd, use_copying_alter_table))
15359 {
15360 DBUG_PRINT("info", ("On-line alter table disabled"));
15361 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15362 }
15363 #ifndef DBUG_OFF
15364 {
15365 char dbug_string[HA_MAX_ALTER_FLAGS+1];
15366 alter_flags->print(dbug_string);
15367 DBUG_PRINT("info", ("Not supported %s", dbug_string));
15368 }
15369 #endif
15370
15371 if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15372 {
15373 /*
15374 sql_partition.cc tries to compute what is going on
15375 and sets flags...that we clear
15376 */
15377 if (part_info->use_default_num_partitions)
15378 {
15379 alter_flags->clear_bit(HA_COALESCE_PARTITION);
15380 alter_flags->clear_bit(HA_ADD_PARTITION);
15381 }
15382 }
15383
15384 if ((*alter_flags & not_supported).is_set())
15385 {
15386 #ifndef DBUG_OFF
15387 HA_ALTER_FLAGS tmp = *alter_flags;
15388 tmp&= not_supported;
15389 char dbug_string[HA_MAX_ALTER_FLAGS+1];
15390 tmp.print(dbug_string);
15391 DBUG_PRINT("info", ("Detected unsupported change: %s", dbug_string));
15392 #endif
15393 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15394 }
15395
15396 if (alter_flags->is_set(HA_ADD_COLUMN) ||
15397 alter_flags->is_set(HA_ADD_PARTITION) ||
15398 alter_flags->is_set(HA_ALTER_TABLE_REORG))
15399 {
15400 Ndb *ndb= get_ndb(thd);
15401 NDBDICT *dict= ndb->getDictionary();
15402 ndb->setDatabaseName(m_dbname);
15403 NdbDictionary::Table new_tab= *old_tab;
15404
15405 if (alter_flags->is_set(HA_ADD_COLUMN))
15406 {
15407 NDBCOL col;
15408
15409 /*
15410 Check that we are only adding columns
15411 */
15412 /*
15413 HA_COLUMN_STORAGE & HA_COLUMN_FORMAT
15414 are set if they are specified in an later cmd
15415 even if they're no change. This is probably a bug
15416 conclusion: add them to add_column-mask, so that we silently "accept" them
15417 In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set
15418 which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases
15419 */
15420 add_column.set_bit(HA_COLUMN_STORAGE);
15421 add_column.set_bit(HA_COLUMN_FORMAT);
15422 if ((*alter_flags & ~add_column).is_set())
15423 {
15424 DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));
15425 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15426 }
15427 /*
15428 Check for extra fields for hidden primary key
15429 or user defined partitioning
15430 */
15431 if (table_share->primary_key == MAX_KEY ||
15432 part_info->part_type != HASH_PARTITION ||
15433 !part_info->list_of_part_fields)
15434 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15435
15436 /* Find the new fields */
15437 for (uint i= table->s->fields; i < altered_table->s->fields; i++)
15438 {
15439 Field *field= altered_table->field[i];
15440 DBUG_PRINT("info", ("Found new field %s", field->field_name));
15441 DBUG_PRINT("info", ("storage_type %i, column_format %i",
15442 (uint) field->field_storage_type(),
15443 (uint) field->column_format()));
15444 /* Create new field to check if it can be added */
15445 if ((my_errno= create_ndb_column(0, col, field, create_info,
15446 COLUMN_FORMAT_TYPE_DYNAMIC)))
15447 {
15448 DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno));
15449 DBUG_RETURN(my_errno);
15450 }
15451 new_tab.addColumn(col);
15452 }
15453 }
15454
15455 if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15456 {
15457 new_tab.setFragmentCount(0);
15458 new_tab.setFragmentData(0, 0);
15459 }
15460 else if (alter_flags->is_set(HA_ADD_PARTITION))
15461 {
15462 DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));
15463 new_tab.setFragmentCount(part_info->num_parts);
15464 }
15465
15466 NDB_Modifiers table_modifiers(ndb_table_modifiers);
15467 table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
15468 create_info->comment.length);
15469 const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");
15470
15471 if (mod_nologging->m_found)
15472 {
15473 new_tab.setLogging(!mod_nologging->m_val_bool);
15474 }
15475
15476 if (dict->supportedAlterTable(*old_tab, new_tab))
15477 {
15478 DBUG_PRINT("info", ("Adding column(s) supported on-line"));
15479 }
15480 else
15481 {
15482 DBUG_PRINT("info",("Adding column not supported on-line"));
15483 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15484 }
15485 }
15486
15487 /*
15488 Check that we are not adding multiple indexes
15489 */
15490 if ((*alter_flags & adding).is_set())
15491 {
15492 if (((altered_table->s->keys - table->s->keys) != 1) ||
15493 (*alter_flags & dropping).is_set())
15494 {
15495 DBUG_PRINT("info",("Only one index can be added on-line"));
15496 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15497 }
15498 }
15499
15500 /*
15501 Check that we are not dropping multiple indexes
15502 */
15503 if ((*alter_flags & dropping).is_set())
15504 {
15505 if (((table->s->keys - altered_table->s->keys) != 1) ||
15506 (*alter_flags & adding).is_set())
15507 {
15508 DBUG_PRINT("info",("Only one index can be dropped on-line"));
15509 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15510 }
15511 }
15512
15513 for (i= 0; i < table->s->fields; i++)
15514 {
15515 Field *field= table->field[i];
15516 const NDBCOL *col= tab->getColumn(i);
15517
15518 NDBCOL new_col;
15519 create_ndb_column(0, new_col, field, create_info);
15520
15521 bool index_on_column = false;
15522 /**
15523 * Check all indexes to determine if column has index instead of checking
15524 * field->flags (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG
15525 * since field->flags appears to only be set on first column in
15526 * multi-part index
15527 */
15528 for (uint j= 0; j<table->s->keys; j++)
15529 {
15530 KEY* key_info= table->key_info + j;
15531 KEY_PART_INFO* key_part= key_info->key_part;
15532 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
15533 for (; key_part != end; key_part++)
15534 {
15535 if (key_part->field->field_index == i)
15536 {
15537 index_on_column= true;
15538 j= table->s->keys; // break outer loop
15539 break;
15540 }
15541 }
15542 }
15543
15544 if (index_on_column == false && (*alter_flags & adding).is_set())
15545 {
15546 for (uint j= table->s->keys; j<altered_table->s->keys; j++)
15547 {
15548 KEY* key_info= altered_table->key_info + j;
15549 KEY_PART_INFO* key_part= key_info->key_part;
15550 KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
15551 for (; key_part != end; key_part++)
15552 {
15553 if (key_part->field->field_index == i)
15554 {
15555 index_on_column= true;
15556 j= altered_table->s->keys; // break outer loop
15557 break;
15558 }
15559 }
15560 }
15561 }
15562
15563 /**
15564 * This is a "copy" of code in ::create()
15565 * that "auto-converts" columns with keys into memory
15566 * (unless storage disk is explicitly added)
15567 * This is needed to check if getStorageType() == getStorageType()
15568 * further down
15569 */
15570 if (index_on_column)
15571 {
15572 if (field->field_storage_type() == HA_SM_DISK)
15573 {
15574 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15575 }
15576 new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);
15577 }
15578 else if (field->field_storage_type() == HA_SM_DEFAULT)
15579 {
15580 /**
15581 * If user didn't specify any column format, keep old
15582 * to make as many alter's as possible online
15583 */
15584 new_col.setStorageType(col->getStorageType());
15585 }
15586
15587 if (col->getStorageType() != new_col.getStorageType())
15588 {
15589 DBUG_PRINT("info", ("Column storage media is changed"));
15590 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15591 }
15592
15593 if (field->flags & FIELD_IS_RENAMED)
15594 {
15595 DBUG_PRINT("info", ("Field has been renamed, copy table"));
15596 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15597 }
15598
15599 if ((field->flags & FIELD_IN_ADD_INDEX) &&
15600 (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))
15601 {
15602 DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
15603 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15604 }
15605 }
15606
15607 if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())
15608 {
15609 /* Check that only auto_increment value was changed */
15610 HA_ALTER_FLAGS change_auto_flags=
15611 change_auto_flags | HA_CHANGE_AUTOINCREMENT_VALUE;
15612 if ((*alter_flags & ~change_auto_flags).is_set())
15613 {
15614 DBUG_PRINT("info", ("Not only auto_increment value changed"));
15615 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15616 }
15617 }
15618 else
15619 {
15620 /* Check that row format didn't change */
15621 if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
15622 get_row_type() != create_info->row_type)
15623 {
15624 DBUG_PRINT("info", ("Row format changed"));
15625 DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15626 }
15627 }
15628
15629 DBUG_PRINT("info", ("Ndb supports ALTER on-line"));
15630 DBUG_RETURN(HA_ALTER_SUPPORTED_WAIT_LOCK);
15631 }
15632
alter_table_phase1(THD * thd,TABLE * altered_table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15633 int ha_ndbcluster::alter_table_phase1(THD *thd,
15634 TABLE *altered_table,
15635 HA_CREATE_INFO *create_info,
15636 HA_ALTER_INFO *alter_info,
15637 HA_ALTER_FLAGS *alter_flags)
15638 {
15639 int error= 0;
15640 uint i;
15641 Thd_ndb *thd_ndb= get_thd_ndb(thd);
15642 Ndb *ndb= get_ndb(thd);
15643 NDBDICT *dict= ndb->getDictionary();
15644 ndb->setDatabaseName(m_dbname);
15645 NDB_ALTER_DATA *alter_data;
15646 const NDBTAB *old_tab;
15647 NdbDictionary::Table *new_tab;
15648 HA_ALTER_FLAGS adding;
15649 HA_ALTER_FLAGS dropping;
15650
15651 DBUG_ENTER("alter_table_phase1");
15652 adding= adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;
15653 dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15654
15655 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase1"))
15656 DBUG_RETURN(HA_ERR_NO_CONNECTION);
15657
15658 if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))
15659 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
15660 old_tab= alter_data->old_table;
15661 new_tab= alter_data->new_table;
15662 alter_info->data= alter_data;
15663 #ifndef DBUG_OFF
15664 {
15665 char dbug_string[HA_MAX_ALTER_FLAGS+1];
15666 alter_flags->print(dbug_string);
15667 DBUG_PRINT("info", ("altered_table %s, alter_flags %s",
15668 altered_table->s->table_name.str,
15669 (char *) dbug_string));
15670 }
15671 #endif
15672
15673 prepare_for_alter();
15674
15675 if (dict->beginSchemaTrans() == -1)
15676 {
15677 DBUG_PRINT("info", ("Failed to start schema transaction"));
15678 ERR_PRINT(dict->getNdbError());
15679 error= ndb_to_mysql_error(&dict->getNdbError());
15680 table->file->print_error(error, MYF(0));
15681 goto err;
15682 }
15683
15684 if ((*alter_flags & adding).is_set())
15685 {
15686 KEY *key_info;
15687 KEY *key;
15688 uint *idx_p;
15689 uint *idx_end_p;
15690 KEY_PART_INFO *key_part;
15691 KEY_PART_INFO *part_end;
15692 DBUG_PRINT("info", ("Adding indexes"));
15693 key_info= (KEY*) thd->alloc(sizeof(KEY) * alter_info->index_add_count);
15694 key= key_info;
15695 for (idx_p= alter_info->index_add_buffer,
15696 idx_end_p= idx_p + alter_info->index_add_count;
15697 idx_p < idx_end_p;
15698 idx_p++, key++)
15699 {
15700 /* Copy the KEY struct. */
15701 *key= alter_info->key_info_buffer[*idx_p];
15702 /* Fix the key parts. */
15703 part_end= key->key_part + key->user_defined_key_parts;
15704 for (key_part= key->key_part; key_part < part_end; key_part++)
15705 key_part->field= table->field[key_part->fieldnr];
15706 }
15707 if ((error= add_index_impl(thd, altered_table, key_info,
15708 alter_info->index_add_count)))
15709 {
15710 /*
15711 Exchange the key_info for the error message. If we exchange
15712 key number by key name in the message later, we need correct info.
15713 */
15714 KEY *save_key_info= table->key_info;
15715 table->key_info= key_info;
15716 table->file->print_error(error, MYF(0));
15717 table->key_info= save_key_info;
15718 goto abort;
15719 }
15720 }
15721
15722 if ((*alter_flags & dropping).is_set())
15723 {
15724 uint *key_numbers;
15725 uint *keyno_p;
15726 uint *idx_p;
15727 uint *idx_end_p;
15728 DBUG_PRINT("info", ("Renumbering indexes"));
15729 /* The prepare_drop_index() method takes an array of key numbers. */
15730 key_numbers= (uint*) thd->alloc(sizeof(uint) * alter_info->index_drop_count);
15731 keyno_p= key_numbers;
15732 /* Get the number of each key. */
15733 for (idx_p= alter_info->index_drop_buffer,
15734 idx_end_p= idx_p + alter_info->index_drop_count;
15735 idx_p < idx_end_p;
15736 idx_p++, keyno_p++)
15737 *keyno_p= *idx_p;
15738 /*
15739 Tell the handler to prepare for drop indexes.
15740 This re-numbers the indexes to get rid of gaps.
15741 */
15742 if ((error= prepare_drop_index(table, key_numbers,
15743 alter_info->index_drop_count)))
15744 {
15745 table->file->print_error(error, MYF(0));
15746 goto abort;
15747 }
15748 }
15749
15750 if (alter_flags->is_set(HA_ADD_COLUMN))
15751 {
15752 NDBCOL col;
15753
15754 /* Find the new fields */
15755 for (i= table->s->fields; i < altered_table->s->fields; i++)
15756 {
15757 Field *field= altered_table->field[i];
15758 DBUG_PRINT("info", ("Found new field %s", field->field_name));
15759 if ((my_errno= create_ndb_column(thd, col, field, create_info,
15760 COLUMN_FORMAT_TYPE_DYNAMIC)))
15761 {
15762 error= my_errno;
15763 goto abort;
15764 }
15765 /*
15766 If the user has not specified the field format
15767 make it dynamic to enable on-line add attribute
15768 */
15769 if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&
15770 create_info->row_type == ROW_TYPE_DEFAULT &&
15771 col.getDynamic())
15772 {
15773 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
15774 ER_ILLEGAL_HA_CREATE_OPTION,
15775 "Converted FIXED field to DYNAMIC "
15776 "to enable on-line ADD COLUMN",
15777 field->field_name);
15778 }
15779 new_tab->addColumn(col);
15780 }
15781 }
15782
15783 if (alter_flags->is_set(HA_ALTER_TABLE_REORG) || alter_flags->is_set(HA_ADD_PARTITION))
15784 {
15785 if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15786 {
15787 new_tab->setFragmentCount(0);
15788 new_tab->setFragmentData(0, 0);
15789 }
15790 else if (alter_flags->is_set(HA_ADD_PARTITION))
15791 {
15792 partition_info *part_info= altered_table->part_info;
15793 new_tab->setFragmentCount(part_info->num_parts);
15794 }
15795
15796 int res= dict->prepareHashMap(*old_tab, *new_tab);
15797 if (res == -1)
15798 {
15799 const NdbError err= dict->getNdbError();
15800 my_errno= ndb_to_mysql_error(&err);
15801 goto abort;
15802 }
15803 }
15804
15805 DBUG_RETURN(0);
15806 abort:
15807 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
15808 == -1)
15809 {
15810 DBUG_PRINT("info", ("Failed to abort schema transaction"));
15811 ERR_PRINT(dict->getNdbError());
15812 error= ndb_to_mysql_error(&dict->getNdbError());
15813 }
15814 err:
15815 set_ndb_share_state(m_share, NSS_INITIAL);
15816 /* ndb_share reference schema free */
15817 DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u",
15818 m_share->key, m_share->use_count));
15819 free_share(&m_share); // Decrease ref_count
15820 delete alter_data;
15821 DBUG_RETURN(error);
15822 }
15823
alter_frm(THD * thd,const char * file,NDB_ALTER_DATA * alter_data)15824 int ha_ndbcluster::alter_frm(THD *thd, const char *file,
15825 NDB_ALTER_DATA *alter_data)
15826 {
15827 uchar *data= NULL, *pack_data= NULL;
15828 size_t length, pack_length;
15829 int error= 0;
15830
15831 DBUG_ENTER("alter_frm");
15832
15833 DBUG_PRINT("enter", ("file: %s", file));
15834
15835 NDBDICT *dict= alter_data->dictionary;
15836
15837 // TODO handle this
15838 DBUG_ASSERT(m_table != 0);
15839
15840 DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED);
15841 if (readfrm(file, &data, &length) ||
15842 packfrm(data, length, &pack_data, &pack_length))
15843 {
15844 DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
15845 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
15846 my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
15847 error= 1;
15848 my_error(ER_FILE_NOT_FOUND, MYF(0), file);
15849 }
15850 else
15851 {
15852 DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",
15853 m_tabname));
15854 const NDBTAB *old_tab= alter_data->old_table;
15855 NdbDictionary::Table *new_tab= alter_data->new_table;
15856
15857 new_tab->setFrm(pack_data, (Uint32)pack_length);
15858 if (dict->alterTableGlobal(*old_tab, *new_tab))
15859 {
15860 DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));
15861 error= ndb_to_mysql_error(&dict->getNdbError());
15862 my_error(error, MYF(0));
15863 }
15864 my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
15865 my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
15866 }
15867
15868 /* ndb_share reference schema(?) free */
15869 DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free use_count: %u",
15870 m_share->key, m_share->use_count));
15871
15872 DBUG_RETURN(error);
15873 }
15874
alter_table_phase2(THD * thd,TABLE * altered_table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15875 int ha_ndbcluster::alter_table_phase2(THD *thd,
15876 TABLE *altered_table,
15877 HA_CREATE_INFO *create_info,
15878 HA_ALTER_INFO *alter_info,
15879 HA_ALTER_FLAGS *alter_flags)
15880
15881 {
15882 int error= 0;
15883 Thd_ndb *thd_ndb= get_thd_ndb(thd);
15884 NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;
15885 NDBDICT *dict= alter_data->dictionary;
15886 HA_ALTER_FLAGS dropping;
15887
15888 DBUG_ENTER("alter_table_phase2");
15889 dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15890
15891 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase2"))
15892 {
15893 error= HA_ERR_NO_CONNECTION;
15894 goto err;
15895 }
15896
15897 if ((*alter_flags & dropping).is_set())
15898 {
15899 /* Tell the handler to finally drop the indexes. */
15900 if ((error= final_drop_index(table)))
15901 {
15902 print_error(error, MYF(0));
15903 goto abort;
15904 }
15905 }
15906
15907 DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));
15908
15909 DBUG_ASSERT(alter_data);
15910 error= alter_frm(thd, altered_table->s->path.str, alter_data);
15911 if (!error)
15912 {
15913 /*
15914 * Alter succesful, commit schema transaction
15915 */
15916 if (dict->endSchemaTrans() == -1)
15917 {
15918 error= ndb_to_mysql_error(&dict->getNdbError());
15919 DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",
15920 error));
15921 table->file->print_error(error, MYF(0));
15922 goto err;
15923 }
15924 if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())
15925 error= set_auto_inc_val(thd, create_info->auto_increment_value);
15926 if (error)
15927 {
15928 DBUG_PRINT("info", ("Failed to set auto_increment value"));
15929 goto err;
15930 }
15931 }
15932 else // if (error)
15933 {
15934 abort:
15935 if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
15936 == -1)
15937 {
15938 DBUG_PRINT("info", ("Failed to abort schema transaction"));
15939 ERR_PRINT(dict->getNdbError());
15940 }
15941 err:
15942 /* ndb_share reference schema free */
15943 DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u",
15944 m_share->key, m_share->use_count));
15945 delete alter_data;
15946 alter_info->data= 0;
15947 }
15948 set_ndb_share_state(m_share, NSS_INITIAL);
15949 free_share(&m_share); // Decrease ref_count
15950 DBUG_RETURN(error);
15951 }
15952
alter_table_phase3(THD * thd,TABLE * table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15953 int ha_ndbcluster::alter_table_phase3(THD *thd, TABLE *table,
15954 HA_CREATE_INFO *create_info,
15955 HA_ALTER_INFO *alter_info,
15956 HA_ALTER_FLAGS *alter_flags)
15957 {
15958 Thd_ndb *thd_ndb= get_thd_ndb(thd);
15959 DBUG_ENTER("alter_table_phase3");
15960
15961 NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;
15962 if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase3"))
15963 {
15964 delete alter_data;
15965 alter_info->data= 0;
15966 DBUG_RETURN(HA_ERR_NO_CONNECTION);
15967 }
15968
15969 const char *db= table->s->db.str;
15970 const char *name= table->s->table_name.str;
15971
15972 /*
15973 all mysqld's will read frms from disk and setup new
15974 event operation for the table (new_op)
15975 */
15976 uint32 table_id= 0, table_version= 0;
15977 DBUG_ASSERT(alter_data != 0);
15978 if (alter_data)
15979 {
15980 table_id= alter_data->table_id;
15981 table_version= alter_data->old_table_version;
15982 }
15983 ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
15984 db, name,
15985 table_id, table_version,
15986 SOT_ONLINE_ALTER_TABLE_PREPARE,
15987 NULL, NULL);
15988
15989 /*
15990 Get table id/version for new table
15991 */
15992 table_id= 0;
15993 table_version= 0;
15994 {
15995 Ndb* ndb= get_ndb(thd);
15996 DBUG_ASSERT(ndb != 0);
15997 if (ndb)
15998 {
15999 ndb->setDatabaseName(db);
16000 Ndb_table_guard ndbtab(ndb->getDictionary(), name);
16001 const NDBTAB *new_tab= ndbtab.get_table();
16002 DBUG_ASSERT(new_tab != 0);
16003 if (new_tab)
16004 {
16005 table_id= new_tab->getObjectId();
16006 table_version= new_tab->getObjectVersion();
16007 }
16008 }
16009 }
16010
16011 /*
16012 all mysqld's will switch to using the new_op, and delete the old
16013 event operation
16014 */
16015 ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
16016 db, name,
16017 table_id, table_version,
16018 SOT_ONLINE_ALTER_TABLE_COMMIT,
16019 NULL, NULL);
16020
16021 delete alter_data;
16022 alter_info->data= 0;
16023 DBUG_RETURN(0);
16024 }
16025 #endif
16026
set_up_tablespace(st_alter_tablespace * alter_info,NdbDictionary::Tablespace * ndb_ts)16027 bool set_up_tablespace(st_alter_tablespace *alter_info,
16028 NdbDictionary::Tablespace *ndb_ts)
16029 {
16030 if (alter_info->extent_size >= (Uint64(1) << 32))
16031 {
16032 // TODO set correct error
16033 return TRUE;
16034 }
16035 ndb_ts->setName(alter_info->tablespace_name);
16036 ndb_ts->setExtentSize(Uint32(alter_info->extent_size));
16037 ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
16038 return FALSE;
16039 }
16040
set_up_datafile(st_alter_tablespace * alter_info,NdbDictionary::Datafile * ndb_df)16041 bool set_up_datafile(st_alter_tablespace *alter_info,
16042 NdbDictionary::Datafile *ndb_df)
16043 {
16044 if (alter_info->max_size > 0)
16045 {
16046 my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
16047 return TRUE;
16048 }
16049 ndb_df->setPath(alter_info->data_file_name);
16050 ndb_df->setSize(alter_info->initial_size);
16051 ndb_df->setTablespace(alter_info->tablespace_name);
16052 return FALSE;
16053 }
16054
set_up_logfile_group(st_alter_tablespace * alter_info,NdbDictionary::LogfileGroup * ndb_lg)16055 bool set_up_logfile_group(st_alter_tablespace *alter_info,
16056 NdbDictionary::LogfileGroup *ndb_lg)
16057 {
16058 if (alter_info->undo_buffer_size >= (Uint64(1) << 32))
16059 {
16060 // TODO set correct error
16061 return TRUE;
16062 }
16063
16064 ndb_lg->setName(alter_info->logfile_group_name);
16065 ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));
16066 return FALSE;
16067 }
16068
set_up_undofile(st_alter_tablespace * alter_info,NdbDictionary::Undofile * ndb_uf)16069 bool set_up_undofile(st_alter_tablespace *alter_info,
16070 NdbDictionary::Undofile *ndb_uf)
16071 {
16072 ndb_uf->setPath(alter_info->undo_file_name);
16073 ndb_uf->setSize(alter_info->initial_size);
16074 ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
16075 return FALSE;
16076 }
16077
ndbcluster_alter_tablespace(handlerton * hton,THD * thd,st_alter_tablespace * alter_info)16078 int ndbcluster_alter_tablespace(handlerton *hton,
16079 THD* thd, st_alter_tablespace *alter_info)
16080 {
16081 int is_tablespace= 0;
16082 NdbError err;
16083 NDBDICT *dict;
16084 int error;
16085 const char *errmsg;
16086 Ndb *ndb;
16087 DBUG_ENTER("ndbcluster_alter_tablespace");
16088 LINT_INIT(errmsg);
16089
16090 ndb= check_ndb_in_thd(thd);
16091 if (ndb == NULL)
16092 {
16093 DBUG_RETURN(HA_ERR_NO_CONNECTION);
16094 }
16095 dict= ndb->getDictionary();
16096
16097 uint32 table_id= 0, table_version= 0;
16098 switch (alter_info->ts_cmd_type){
16099 case (CREATE_TABLESPACE):
16100 {
16101 error= ER_CREATE_FILEGROUP_FAILED;
16102
16103 NdbDictionary::Tablespace ndb_ts;
16104 NdbDictionary::Datafile ndb_df;
16105 NdbDictionary::ObjectId objid;
16106 if (set_up_tablespace(alter_info, &ndb_ts))
16107 {
16108 DBUG_RETURN(1);
16109 }
16110 if (set_up_datafile(alter_info, &ndb_df))
16111 {
16112 DBUG_RETURN(1);
16113 }
16114 errmsg= "TABLESPACE";
16115 if (dict->createTablespace(ndb_ts, &objid))
16116 {
16117 DBUG_PRINT("error", ("createTablespace returned %d", error));
16118 goto ndberror;
16119 }
16120 table_id = objid.getObjectId();
16121 table_version = objid.getObjectVersion();
16122 if (dict->getWarningFlags() &
16123 NdbDictionary::Dictionary::WarnExtentRoundUp)
16124 {
16125 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16126 dict->getWarningFlags(),
16127 "Extent size rounded up to kernel page size");
16128 }
16129 DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
16130 errmsg= "DATAFILE";
16131 if (dict->createDatafile(ndb_df))
16132 {
16133 err= dict->getNdbError();
16134 NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
16135 if (dict->getNdbError().code == 0 &&
16136 tmp.getObjectId() == objid.getObjectId() &&
16137 tmp.getObjectVersion() == objid.getObjectVersion())
16138 {
16139 dict->dropTablespace(tmp);
16140 }
16141
16142 DBUG_PRINT("error", ("createDatafile returned %d", error));
16143 goto ndberror2;
16144 }
16145 if (dict->getWarningFlags() &
16146 NdbDictionary::Dictionary::WarnDatafileRoundUp)
16147 {
16148 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16149 dict->getWarningFlags(),
16150 "Datafile size rounded up to extent size");
16151 }
16152 else /* produce only 1 message */
16153 if (dict->getWarningFlags() &
16154 NdbDictionary::Dictionary::WarnDatafileRoundDown)
16155 {
16156 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16157 dict->getWarningFlags(),
16158 "Datafile size rounded down to extent size");
16159 }
16160 is_tablespace= 1;
16161 break;
16162 }
16163 case (ALTER_TABLESPACE):
16164 {
16165 error= ER_ALTER_FILEGROUP_FAILED;
16166 if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
16167 {
16168 NdbDictionary::Datafile ndb_df;
16169 if (set_up_datafile(alter_info, &ndb_df))
16170 {
16171 DBUG_RETURN(1);
16172 }
16173 errmsg= " CREATE DATAFILE";
16174 NdbDictionary::ObjectId objid;
16175 if (dict->createDatafile(ndb_df, false, &objid))
16176 {
16177 goto ndberror;
16178 }
16179 table_id= objid.getObjectId();
16180 table_version= objid.getObjectVersion();
16181 if (dict->getWarningFlags() &
16182 NdbDictionary::Dictionary::WarnDatafileRoundUp)
16183 {
16184 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16185 dict->getWarningFlags(),
16186 "Datafile size rounded up to extent size");
16187 }
16188 else /* produce only 1 message */
16189 if (dict->getWarningFlags() &
16190 NdbDictionary::Dictionary::WarnDatafileRoundDown)
16191 {
16192 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16193 dict->getWarningFlags(),
16194 "Datafile size rounded down to extent size");
16195 }
16196 }
16197 else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
16198 {
16199 NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
16200 NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
16201 NdbDictionary::ObjectId objid;
16202 df.getTablespaceId(&objid);
16203 table_id = df.getObjectId();
16204 table_version = df.getObjectVersion();
16205 if (ts.getObjectId() == objid.getObjectId() &&
16206 strcmp(df.getPath(), alter_info->data_file_name) == 0)
16207 {
16208 errmsg= " DROP DATAFILE";
16209 if (dict->dropDatafile(df))
16210 {
16211 goto ndberror;
16212 }
16213 }
16214 else
16215 {
16216 DBUG_PRINT("error", ("No such datafile"));
16217 my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
16218 DBUG_RETURN(1);
16219 }
16220 }
16221 else
16222 {
16223 DBUG_PRINT("error", ("Unsupported alter tablespace: %d",
16224 alter_info->ts_alter_tablespace_type));
16225 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16226 }
16227 is_tablespace= 1;
16228 break;
16229 }
16230 case (CREATE_LOGFILE_GROUP):
16231 {
16232 error= ER_CREATE_FILEGROUP_FAILED;
16233 NdbDictionary::LogfileGroup ndb_lg;
16234 NdbDictionary::Undofile ndb_uf;
16235 NdbDictionary::ObjectId objid;
16236 if (alter_info->undo_file_name == NULL)
16237 {
16238 /*
16239 REDO files in LOGFILE GROUP not supported yet
16240 */
16241 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16242 }
16243 if (set_up_logfile_group(alter_info, &ndb_lg))
16244 {
16245 DBUG_RETURN(1);
16246 }
16247 errmsg= "LOGFILE GROUP";
16248 if (dict->createLogfileGroup(ndb_lg, &objid))
16249 {
16250 goto ndberror;
16251 }
16252 table_id = objid.getObjectId();
16253 table_version = objid.getObjectVersion();
16254 if (dict->getWarningFlags() &
16255 NdbDictionary::Dictionary::WarnUndobufferRoundUp)
16256 {
16257 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16258 dict->getWarningFlags(),
16259 "Undo buffer size rounded up to kernel page size");
16260 }
16261 DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
16262 if (set_up_undofile(alter_info, &ndb_uf))
16263 {
16264 DBUG_RETURN(1);
16265 }
16266 errmsg= "UNDOFILE";
16267 if (dict->createUndofile(ndb_uf))
16268 {
16269 err= dict->getNdbError();
16270 NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
16271 if (dict->getNdbError().code == 0 &&
16272 tmp.getObjectId() == objid.getObjectId() &&
16273 tmp.getObjectVersion() == objid.getObjectVersion())
16274 {
16275 dict->dropLogfileGroup(tmp);
16276 }
16277 goto ndberror2;
16278 }
16279 if (dict->getWarningFlags() &
16280 NdbDictionary::Dictionary::WarnUndofileRoundDown)
16281 {
16282 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16283 dict->getWarningFlags(),
16284 "Undofile size rounded down to kernel page size");
16285 }
16286 break;
16287 }
16288 case (ALTER_LOGFILE_GROUP):
16289 {
16290 error= ER_ALTER_FILEGROUP_FAILED;
16291 if (alter_info->undo_file_name == NULL)
16292 {
16293 /*
16294 REDO files in LOGFILE GROUP not supported yet
16295 */
16296 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16297 }
16298 NdbDictionary::Undofile ndb_uf;
16299 if (set_up_undofile(alter_info, &ndb_uf))
16300 {
16301 DBUG_RETURN(1);
16302 }
16303 errmsg= "CREATE UNDOFILE";
16304 NdbDictionary::ObjectId objid;
16305 if (dict->createUndofile(ndb_uf, false, &objid))
16306 {
16307 goto ndberror;
16308 }
16309 table_id = objid.getObjectId();
16310 table_version = objid.getObjectVersion();
16311 if (dict->getWarningFlags() &
16312 NdbDictionary::Dictionary::WarnUndofileRoundDown)
16313 {
16314 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16315 dict->getWarningFlags(),
16316 "Undofile size rounded down to kernel page size");
16317 }
16318 break;
16319 }
16320 case (DROP_TABLESPACE):
16321 {
16322 error= ER_DROP_FILEGROUP_FAILED;
16323 errmsg= "TABLESPACE";
16324 NdbDictionary::Tablespace ts=
16325 dict->getTablespace(alter_info->tablespace_name);
16326 table_id= ts.getObjectId();
16327 table_version= ts.getObjectVersion();
16328 if (dict->dropTablespace(ts))
16329 {
16330 goto ndberror;
16331 }
16332 is_tablespace= 1;
16333 break;
16334 }
16335 case (DROP_LOGFILE_GROUP):
16336 {
16337 error= ER_DROP_FILEGROUP_FAILED;
16338 errmsg= "LOGFILE GROUP";
16339 NdbDictionary::LogfileGroup lg=
16340 dict->getLogfileGroup(alter_info->logfile_group_name);
16341 table_id= lg.getObjectId();
16342 table_version= lg.getObjectVersion();
16343 if (dict->dropLogfileGroup(lg))
16344 {
16345 goto ndberror;
16346 }
16347 break;
16348 }
16349 case (CHANGE_FILE_TABLESPACE):
16350 {
16351 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16352 }
16353 case (ALTER_ACCESS_MODE_TABLESPACE):
16354 {
16355 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16356 }
16357 default:
16358 {
16359 DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16360 }
16361 }
16362 if (is_tablespace)
16363 ndbcluster_log_schema_op(thd,
16364 thd->query(), thd->query_length(),
16365 "", alter_info->tablespace_name,
16366 table_id, table_version,
16367 SOT_TABLESPACE, NULL, NULL);
16368 else
16369 ndbcluster_log_schema_op(thd,
16370 thd->query(), thd->query_length(),
16371 "", alter_info->logfile_group_name,
16372 table_id, table_version,
16373 SOT_LOGFILE_GROUP, NULL, NULL);
16374 DBUG_RETURN(FALSE);
16375
16376 ndberror:
16377 err= dict->getNdbError();
16378 ndberror2:
16379 ndb_to_mysql_error(&err);
16380
16381 my_error(error, MYF(0), errmsg);
16382 DBUG_RETURN(1);
16383 }
16384
16385
get_no_parts(const char * name,uint * no_parts)16386 bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts)
16387 {
16388 THD *thd= current_thd;
16389 Ndb *ndb;
16390 NDBDICT *dict;
16391 int err;
16392 DBUG_ENTER("ha_ndbcluster::get_no_parts");
16393 LINT_INIT(err);
16394
16395 set_dbname(name);
16396 set_tabname(name);
16397 for (;;)
16398 {
16399 if (check_ndb_connection(thd))
16400 {
16401 err= HA_ERR_NO_CONNECTION;
16402 break;
16403 }
16404 ndb= get_ndb(thd);
16405 ndb->setDatabaseName(m_dbname);
16406 Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
16407 if (!ndbtab_g.get_table())
16408 ERR_BREAK(dict->getNdbError(), err);
16409 *no_parts= ndbtab_g.get_table()->getFragmentCount();
16410 DBUG_RETURN(FALSE);
16411 }
16412
16413 print_error(err, MYF(0));
16414 DBUG_RETURN(TRUE);
16415 }
16416
ndbcluster_fill_files_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond)16417 static int ndbcluster_fill_files_table(handlerton *hton,
16418 THD *thd,
16419 TABLE_LIST *tables,
16420 Item *cond)
16421 {
16422 TABLE* table= tables->table;
16423 Ndb *ndb= check_ndb_in_thd(thd);
16424 NdbDictionary::Dictionary* dict= ndb->getDictionary();
16425 NdbDictionary::Dictionary::List dflist;
16426 NdbError ndberr;
16427 uint i;
16428 DBUG_ENTER("ndbcluster_fill_files_table");
16429
16430 dict->listObjects(dflist, NdbDictionary::Object::Datafile);
16431 ndberr= dict->getNdbError();
16432 if (ndberr.classification != NdbError::NoError)
16433 ERR_RETURN(ndberr);
16434
16435 for (i= 0; i < dflist.count; i++)
16436 {
16437 NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
16438 Ndb_cluster_connection_node_iter iter;
16439 uint id;
16440
16441 g_ndb_cluster_connection->init_get_next_node(iter);
16442
16443 while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
16444 {
16445 init_fill_schema_files_row(table);
16446 NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
16447 ndberr= dict->getNdbError();
16448 if(ndberr.classification != NdbError::NoError)
16449 {
16450 if (ndberr.classification == NdbError::SchemaError)
16451 continue;
16452
16453 if (ndberr.classification == NdbError::UnknownResultError)
16454 continue;
16455
16456 ERR_RETURN(ndberr);
16457 }
16458 NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
16459 ndberr= dict->getNdbError();
16460 if (ndberr.classification != NdbError::NoError)
16461 {
16462 if (ndberr.classification == NdbError::SchemaError)
16463 continue;
16464 ERR_RETURN(ndberr);
16465 }
16466
16467 table->field[IS_FILES_FILE_NAME]->set_notnull();
16468 table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
16469 system_charset_info);
16470 table->field[IS_FILES_FILE_TYPE]->set_notnull();
16471 table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
16472 system_charset_info);
16473 table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
16474 table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
16475 strlen(df.getTablespace()),
16476 system_charset_info);
16477 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16478 table->field[IS_FILES_LOGFILE_GROUP_NAME]->
16479 store(ts.getDefaultLogfileGroup(),
16480 strlen(ts.getDefaultLogfileGroup()),
16481 system_charset_info);
16482 table->field[IS_FILES_ENGINE]->set_notnull();
16483 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16484 ndbcluster_hton_name_length,
16485 system_charset_info);
16486
16487 table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
16488 table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
16489 / ts.getExtentSize(), true);
16490 table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
16491 table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
16492 / ts.getExtentSize(), true);
16493 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16494 table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
16495 table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
16496 table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);
16497 table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
16498 table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);
16499 table->field[IS_FILES_VERSION]->set_notnull();
16500 table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);
16501
16502 table->field[IS_FILES_ROW_FORMAT]->set_notnull();
16503 table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);
16504
16505 char extra[30];
16506 int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
16507 table->field[IS_FILES_EXTRA]->set_notnull();
16508 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16509 schema_table_store_record(thd, table);
16510 }
16511 }
16512
16513 NdbDictionary::Dictionary::List tslist;
16514 dict->listObjects(tslist, NdbDictionary::Object::Tablespace);
16515 ndberr= dict->getNdbError();
16516 if (ndberr.classification != NdbError::NoError)
16517 ERR_RETURN(ndberr);
16518
16519 for (i= 0; i < tslist.count; i++)
16520 {
16521 NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];
16522
16523 NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);
16524 ndberr= dict->getNdbError();
16525 if (ndberr.classification != NdbError::NoError)
16526 {
16527 if (ndberr.classification == NdbError::SchemaError)
16528 continue;
16529 ERR_RETURN(ndberr);
16530 }
16531
16532 init_fill_schema_files_row(table);
16533 table->field[IS_FILES_FILE_TYPE]->set_notnull();
16534 table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,
16535 system_charset_info);
16536
16537 table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
16538 table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,
16539 strlen(elt.name),
16540 system_charset_info);
16541 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16542 table->field[IS_FILES_LOGFILE_GROUP_NAME]->
16543 store(ts.getDefaultLogfileGroup(),
16544 strlen(ts.getDefaultLogfileGroup()),
16545 system_charset_info);
16546
16547 table->field[IS_FILES_ENGINE]->set_notnull();
16548 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16549 ndbcluster_hton_name_length,
16550 system_charset_info);
16551
16552 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16553 table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
16554
16555 table->field[IS_FILES_VERSION]->set_notnull();
16556 table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);
16557
16558 schema_table_store_record(thd, table);
16559 }
16560
16561 NdbDictionary::Dictionary::List uflist;
16562 dict->listObjects(uflist, NdbDictionary::Object::Undofile);
16563 ndberr= dict->getNdbError();
16564 if (ndberr.classification != NdbError::NoError)
16565 ERR_RETURN(ndberr);
16566
16567 for (i= 0; i < uflist.count; i++)
16568 {
16569 NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
16570 Ndb_cluster_connection_node_iter iter;
16571 unsigned id;
16572
16573 g_ndb_cluster_connection->init_get_next_node(iter);
16574
16575 while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
16576 {
16577 NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
16578 ndberr= dict->getNdbError();
16579 if (ndberr.classification != NdbError::NoError)
16580 {
16581 if (ndberr.classification == NdbError::SchemaError)
16582 continue;
16583 if (ndberr.classification == NdbError::UnknownResultError)
16584 continue;
16585 ERR_RETURN(ndberr);
16586 }
16587 NdbDictionary::LogfileGroup lfg=
16588 dict->getLogfileGroup(uf.getLogfileGroup());
16589 ndberr= dict->getNdbError();
16590 if (ndberr.classification != NdbError::NoError)
16591 {
16592 if (ndberr.classification == NdbError::SchemaError)
16593 continue;
16594 ERR_RETURN(ndberr);
16595 }
16596
16597 init_fill_schema_files_row(table);
16598 table->field[IS_FILES_FILE_NAME]->set_notnull();
16599 table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
16600 system_charset_info);
16601 table->field[IS_FILES_FILE_TYPE]->set_notnull();
16602 table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
16603 system_charset_info);
16604 NdbDictionary::ObjectId objid;
16605 uf.getLogfileGroupId(&objid);
16606 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16607 table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
16608 strlen(uf.getLogfileGroup()),
16609 system_charset_info);
16610 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
16611 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);
16612 table->field[IS_FILES_ENGINE]->set_notnull();
16613 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16614 ndbcluster_hton_name_length,
16615 system_charset_info);
16616
16617 table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
16618 table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);
16619 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16620 table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
16621
16622 table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
16623 table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);
16624 table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
16625 table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);
16626
16627 table->field[IS_FILES_VERSION]->set_notnull();
16628 table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);
16629
16630 char extra[100];
16631 int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
16632 id, (ulong) lfg.getUndoBufferSize());
16633 table->field[IS_FILES_EXTRA]->set_notnull();
16634 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16635 schema_table_store_record(thd, table);
16636 }
16637 }
16638
16639 // now for LFGs
16640 NdbDictionary::Dictionary::List lfglist;
16641 dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
16642 ndberr= dict->getNdbError();
16643 if (ndberr.classification != NdbError::NoError)
16644 ERR_RETURN(ndberr);
16645
16646 for (i= 0; i < lfglist.count; i++)
16647 {
16648 NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];
16649
16650 NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
16651 ndberr= dict->getNdbError();
16652 if (ndberr.classification != NdbError::NoError)
16653 {
16654 if (ndberr.classification == NdbError::SchemaError)
16655 continue;
16656 ERR_RETURN(ndberr);
16657 }
16658
16659 init_fill_schema_files_row(table);
16660 table->field[IS_FILES_FILE_TYPE]->set_notnull();
16661 table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
16662 system_charset_info);
16663
16664 table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16665 table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
16666 strlen(elt.name),
16667 system_charset_info);
16668 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
16669 table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);
16670 table->field[IS_FILES_ENGINE]->set_notnull();
16671 table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16672 ndbcluster_hton_name_length,
16673 system_charset_info);
16674
16675 table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
16676 table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);
16677 table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16678 table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
16679
16680 table->field[IS_FILES_VERSION]->set_notnull();
16681 table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);
16682
16683 char extra[100];
16684 int len= my_snprintf(extra,sizeof(extra),
16685 "UNDO_BUFFER_SIZE=%lu",
16686 (ulong) lfg.getUndoBufferSize());
16687 table->field[IS_FILES_EXTRA]->set_notnull();
16688 table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16689 schema_table_store_record(thd, table);
16690 }
16691 DBUG_RETURN(0);
16692 }
16693
show_ndb_vars(THD * thd,SHOW_VAR * var,char * buff)16694 static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)
16695 {
16696 if (!check_ndb_in_thd(thd))
16697 return -1;
16698 struct st_ndb_status *st;
16699 SHOW_VAR *st_var;
16700 {
16701 char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +
16702 sizeof(ndb_status_variables_dynamic));
16703 st= new (mem) st_ndb_status;
16704 st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));
16705 memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));
16706 int i= 0;
16707 SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);
16708 for (; tmp->value; tmp++, i++)
16709 st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);
16710 }
16711 {
16712 Thd_ndb *thd_ndb= get_thd_ndb(thd);
16713 Ndb_cluster_connection *c= thd_ndb->connection;
16714 update_status_variables(thd_ndb, st, c);
16715 }
16716 var->type= SHOW_ARRAY;
16717 var->value= (char *) st_var;
16718 return 0;
16719 }
16720
16721 SHOW_VAR ndb_status_variables_export[]= {
16722 {"Ndb", (char*) &show_ndb_vars, SHOW_FUNC},
16723 {"Ndb_conflict", (char*) &ndb_status_conflict_variables, SHOW_ARRAY},
16724 {"Ndb", (char*) &ndb_status_injector_variables, SHOW_ARRAY},
16725 {"Ndb", (char*) &ndb_status_slave_variables, SHOW_ARRAY},
16726 {"Ndb", (char*) &show_ndb_server_api_stats, SHOW_FUNC},
16727 {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY},
16728 {NullS, NullS, SHOW_LONG}
16729 };
16730
16731 static MYSQL_SYSVAR_ULONG(
16732 cache_check_time, /* name */
16733 opt_ndb_cache_check_time, /* var */
16734 PLUGIN_VAR_RQCMDARG,
16735 "A dedicated thread is created to, at the given "
16736 "millisecond interval, invalidate the query cache "
16737 "if another MySQL server in the cluster has changed "
16738 "the data in the database.",
16739 NULL, /* check func. */
16740 NULL, /* update func. */
16741 0, /* default */
16742 0, /* min */
16743 ONE_YEAR_IN_SECONDS, /* max */
16744 0 /* block */
16745 );
16746
16747
16748 static MYSQL_SYSVAR_ULONG(
16749 extra_logging, /* name */
16750 opt_ndb_extra_logging, /* var */
16751 PLUGIN_VAR_OPCMDARG,
16752 "Turn on more logging in the error log.",
16753 NULL, /* check func. */
16754 NULL, /* update func. */
16755 1, /* default */
16756 0, /* min */
16757 0, /* max */
16758 0 /* block */
16759 );
16760
16761
16762 static MYSQL_SYSVAR_ULONG(
16763 wait_connected, /* name */
16764 opt_ndb_wait_connected, /* var */
16765 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16766 "Time (in seconds) for mysqld to wait for connection "
16767 "to cluster management and data nodes.",
16768 NULL, /* check func. */
16769 NULL, /* update func. */
16770 0, /* default */
16771 0, /* min */
16772 ONE_YEAR_IN_SECONDS, /* max */
16773 0 /* block */
16774 );
16775
16776
16777 static MYSQL_SYSVAR_ULONG(
16778 wait_setup, /* name */
16779 opt_ndb_wait_setup, /* var */
16780 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16781 "Time (in seconds) for mysqld to wait for setup to "
16782 "complete (0 = no wait)",
16783 NULL, /* check func. */
16784 NULL, /* update func. */
16785 15, /* default */
16786 0, /* min */
16787 ONE_YEAR_IN_SECONDS, /* max */
16788 0 /* block */
16789 );
16790
16791
16792 static MYSQL_SYSVAR_UINT(
16793 cluster_connection_pool, /* name */
16794 opt_ndb_cluster_connection_pool, /* var */
16795 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16796 "Pool of cluster connections to be used by mysql server.",
16797 NULL, /* check func. */
16798 NULL, /* update func. */
16799 1, /* default */
16800 1, /* min */
16801 63, /* max */
16802 0 /* block */
16803 );
16804
16805 /* should be in index_stat.h */
16806
16807 extern int
16808 ndb_index_stat_option_check(MYSQL_THD,
16809 struct st_mysql_sys_var *var,
16810 void *save,
16811 struct st_mysql_value *value);
16812 extern void
16813 ndb_index_stat_option_update(MYSQL_THD,
16814 struct st_mysql_sys_var *var,
16815 void *var_ptr,
16816 const void *save);
16817
16818 extern char ndb_index_stat_option_buf[];
16819
16820 static MYSQL_SYSVAR_STR(
16821 index_stat_option, /* name */
16822 opt_ndb_index_stat_option, /* var */
16823 PLUGIN_VAR_RQCMDARG,
16824 "Comma-separated tunable options for ndb index statistics",
16825 ndb_index_stat_option_check, /* check func. */
16826 ndb_index_stat_option_update, /* update func. */
16827 ndb_index_stat_option_buf
16828 );
16829
16830
16831 ulong opt_ndb_report_thresh_binlog_epoch_slip;
16832 static MYSQL_SYSVAR_ULONG(
16833 report_thresh_binlog_epoch_slip, /* name */
16834 opt_ndb_report_thresh_binlog_epoch_slip,/* var */
16835 PLUGIN_VAR_RQCMDARG,
16836 "Threshold on number of epochs to be behind before reporting binlog "
16837 "status. E.g. 3 means that if the difference between what epoch has "
16838 "been received from the storage nodes and what has been applied to "
16839 "the binlog is 3 or more, a status message will be sent to the cluster "
16840 "log.",
16841 NULL, /* check func. */
16842 NULL, /* update func. */
16843 3, /* default */
16844 0, /* min */
16845 256, /* max */
16846 0 /* block */
16847 );
16848
16849
16850 ulong opt_ndb_report_thresh_binlog_mem_usage;
16851 static MYSQL_SYSVAR_ULONG(
16852 report_thresh_binlog_mem_usage, /* name */
16853 opt_ndb_report_thresh_binlog_mem_usage,/* var */
16854 PLUGIN_VAR_RQCMDARG,
16855 "Threshold on percentage of free memory before reporting binlog "
16856 "status. E.g. 10 means that if amount of available memory for "
16857 "receiving binlog data from the storage nodes goes below 10%, "
16858 "a status message will be sent to the cluster log.",
16859 NULL, /* check func. */
16860 NULL, /* update func. */
16861 10, /* default */
16862 0, /* min */
16863 100, /* max */
16864 0 /* block */
16865 );
16866
16867
16868 my_bool opt_ndb_log_update_as_write;
16869 static MYSQL_SYSVAR_BOOL(
16870 log_update_as_write, /* name */
16871 opt_ndb_log_update_as_write, /* var */
16872 PLUGIN_VAR_OPCMDARG,
16873 "For efficiency log only after image as a write event. "
16874 "Ignore before image. This may cause compatability problems if "
16875 "replicating to other storage engines than ndbcluster.",
16876 NULL, /* check func. */
16877 NULL, /* update func. */
16878 1 /* default */
16879 );
16880
16881
16882 my_bool opt_ndb_log_updated_only;
16883 static MYSQL_SYSVAR_BOOL(
16884 log_updated_only, /* name */
16885 opt_ndb_log_updated_only, /* var */
16886 PLUGIN_VAR_OPCMDARG,
16887 "For efficiency log only updated columns. Columns are considered "
16888 "as \"updated\" even if they are updated with the same value. "
16889 "This may cause compatability problems if "
16890 "replicating to other storage engines than ndbcluster.",
16891 NULL, /* check func. */
16892 NULL, /* update func. */
16893 1 /* default */
16894 );
16895
16896
16897 my_bool opt_ndb_log_orig;
16898 static MYSQL_SYSVAR_BOOL(
16899 log_orig, /* name */
16900 opt_ndb_log_orig, /* var */
16901 PLUGIN_VAR_OPCMDARG,
16902 "Log originating server id and epoch in ndb_binlog_index. Each epoch "
16903 "may in this case have multiple rows in ndb_binlog_index, one for "
16904 "each originating epoch.",
16905 NULL, /* check func. */
16906 NULL, /* update func. */
16907 0 /* default */
16908 );
16909
16910
16911 my_bool opt_ndb_log_bin;
16912 static MYSQL_SYSVAR_BOOL(
16913 log_bin, /* name */
16914 opt_ndb_log_bin, /* var */
16915 PLUGIN_VAR_OPCMDARG,
16916 "Log ndb tables in the binary log. Option only has meaning if "
16917 "the binary log has been turned on for the server.",
16918 NULL, /* check func. */
16919 NULL, /* update func. */
16920 1 /* default */
16921 );
16922
16923
16924 my_bool opt_ndb_log_binlog_index;
16925 static MYSQL_SYSVAR_BOOL(
16926 log_binlog_index, /* name */
16927 opt_ndb_log_binlog_index, /* var */
16928 PLUGIN_VAR_OPCMDARG,
16929 "Insert mapping between epochs and binlog positions into the "
16930 "ndb_binlog_index table.",
16931 NULL, /* check func. */
16932 NULL, /* update func. */
16933 1 /* default */
16934 );
16935
16936
16937 static my_bool opt_ndb_log_empty_epochs;
16938 static MYSQL_SYSVAR_BOOL(
16939 log_empty_epochs, /* name */
16940 opt_ndb_log_empty_epochs, /* var */
16941 PLUGIN_VAR_OPCMDARG,
16942 "",
16943 NULL, /* check func. */
16944 NULL, /* update func. */
16945 0 /* default */
16946 );
16947
ndb_log_empty_epochs(void)16948 bool ndb_log_empty_epochs(void)
16949 {
16950 return opt_ndb_log_empty_epochs;
16951 }
16952
16953 my_bool opt_ndb_log_apply_status;
16954 static MYSQL_SYSVAR_BOOL(
16955 log_apply_status, /* name */
16956 opt_ndb_log_apply_status, /* var */
16957 PLUGIN_VAR_OPCMDARG,
16958 "Log ndb_apply_status updates from Master in the Binlog",
16959 NULL, /* check func. */
16960 NULL, /* update func. */
16961 0 /* default */
16962 );
16963
16964
16965 static MYSQL_SYSVAR_STR(
16966 connectstring, /* name */
16967 opt_ndb_connectstring, /* var */
16968 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16969 "Connect string for ndbcluster.",
16970 NULL, /* check func. */
16971 NULL, /* update func. */
16972 NULL /* default */
16973 );
16974
16975
16976 static MYSQL_SYSVAR_STR(
16977 mgmd_host, /* name */
16978 opt_ndb_connectstring, /* var */
16979 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16980 "Same as --ndb-connectstring",
16981 NULL, /* check func. */
16982 NULL, /* update func. */
16983 NULL /* default */
16984 );
16985
16986
16987 static MYSQL_SYSVAR_UINT(
16988 nodeid, /* name */
16989 opt_ndb_nodeid, /* var */
16990 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16991 "Set nodeid for this node. Overrides node id specified "
16992 "in --ndb-connectstring.",
16993 NULL, /* check func. */
16994 NULL, /* update func. */
16995 0, /* default */
16996 0, /* min */
16997 MAX_NODES_ID, /* max */
16998 0 /* block */
16999 );
17000
17001 #ifndef DBUG_OFF
17002
17003 static
17004 void
dbug_check_shares(THD *,st_mysql_sys_var *,void *,const void *)17005 dbug_check_shares(THD*, st_mysql_sys_var*, void*, const void*)
17006 {
17007 sql_print_information("dbug_check_shares");
17008 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
17009 {
17010 NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
17011 sql_print_information(" %s.%s: state: %s(%u) use_count: %u",
17012 share->db, share->table_name,
17013 get_share_state_string(share->state),
17014 (unsigned)share->state,
17015 share->use_count);
17016 }
17017
17018 /**
17019 * Only shares in mysql database may be open...
17020 */
17021 for (uint i= 0; i < ndbcluster_open_tables.records; i++)
17022 {
17023 NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
17024 DBUG_ASSERT(strcmp(share->db, "mysql") == 0);
17025 }
17026 }
17027
17028 static MYSQL_THDVAR_UINT(
17029 check_shares, /* name */
17030 PLUGIN_VAR_RQCMDARG,
17031 "Debug, only...check that no shares are lingering...",
17032 NULL, /* check func */
17033 dbug_check_shares, /* update func */
17034 0, /* default */
17035 0, /* min */
17036 1, /* max */
17037 0 /* block */
17038 );
17039
17040 #endif
17041
17042 static struct st_mysql_sys_var* system_variables[]= {
17043 MYSQL_SYSVAR(cache_check_time),
17044 MYSQL_SYSVAR(extra_logging),
17045 MYSQL_SYSVAR(wait_connected),
17046 MYSQL_SYSVAR(wait_setup),
17047 MYSQL_SYSVAR(cluster_connection_pool),
17048 MYSQL_SYSVAR(report_thresh_binlog_mem_usage),
17049 MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),
17050 MYSQL_SYSVAR(log_update_as_write),
17051 MYSQL_SYSVAR(log_updated_only),
17052 MYSQL_SYSVAR(log_orig),
17053 MYSQL_SYSVAR(distribution),
17054 MYSQL_SYSVAR(autoincrement_prefetch_sz),
17055 MYSQL_SYSVAR(force_send),
17056 MYSQL_SYSVAR(use_exact_count),
17057 MYSQL_SYSVAR(use_transactions),
17058 MYSQL_SYSVAR(use_copying_alter_table),
17059 MYSQL_SYSVAR(optimized_node_selection),
17060 MYSQL_SYSVAR(batch_size),
17061 MYSQL_SYSVAR(optimization_delay),
17062 MYSQL_SYSVAR(index_stat_enable),
17063 MYSQL_SYSVAR(index_stat_option),
17064 MYSQL_SYSVAR(index_stat_cache_entries),
17065 MYSQL_SYSVAR(index_stat_update_freq),
17066 MYSQL_SYSVAR(table_no_logging),
17067 MYSQL_SYSVAR(table_temporary),
17068 MYSQL_SYSVAR(log_bin),
17069 MYSQL_SYSVAR(log_binlog_index),
17070 MYSQL_SYSVAR(log_empty_epochs),
17071 MYSQL_SYSVAR(log_apply_status),
17072 MYSQL_SYSVAR(connectstring),
17073 MYSQL_SYSVAR(mgmd_host),
17074 MYSQL_SYSVAR(nodeid),
17075 MYSQL_SYSVAR(blob_read_batch_bytes),
17076 MYSQL_SYSVAR(blob_write_batch_bytes),
17077 MYSQL_SYSVAR(deferred_constraints),
17078 MYSQL_SYSVAR(join_pushdown),
17079 #ifndef DBUG_OFF
17080 MYSQL_SYSVAR(check_shares),
17081 #endif
17082 NULL
17083 };
17084
17085 struct st_mysql_storage_engine ndbcluster_storage_engine=
17086 { MYSQL_HANDLERTON_INTERFACE_VERSION };
17087
17088
17089 #include "ha_ndbinfo.h"
17090
17091 extern struct st_mysql_sys_var* ndbinfo_system_variables[];
17092
17093 struct st_mysql_storage_engine ndbinfo_storage_engine=
17094 { MYSQL_HANDLERTON_INTERFACE_VERSION };
17095
mysql_declare_plugin(ndbcluster)17096 mysql_declare_plugin(ndbcluster)
17097 {
17098 MYSQL_STORAGE_ENGINE_PLUGIN,
17099 &ndbcluster_storage_engine,
17100 ndbcluster_hton_name,
17101 "MySQL AB",
17102 "Clustered, fault-tolerant tables",
17103 PLUGIN_LICENSE_GPL,
17104 ndbcluster_init, /* plugin init */
17105 NULL, /* plugin deinit */
17106 0x0100, /* plugin version */
17107 ndb_status_variables_export,/* status variables */
17108 system_variables, /* system variables */
17109 NULL, /* config options */
17110 0 /* flags */
17111 },
17112 {
17113 MYSQL_STORAGE_ENGINE_PLUGIN,
17114 &ndbinfo_storage_engine,
17115 "ndbinfo",
17116 "Sun Microsystems Inc.",
17117 "MySQL Cluster system information storage engine",
17118 PLUGIN_LICENSE_GPL,
17119 ndbinfo_init, /* plugin init */
17120 ndbinfo_deinit, /* plugin deinit */
17121 0x0001, /* plugin version */
17122 NULL, /* status variables */
17123 ndbinfo_system_variables, /* system variables */
17124 NULL, /* config options */
17125 0 /* flags */
17126 }
17127 mysql_declare_plugin_end;
17128
17129 #endif
17130