1 /* Copyright (c) 2004, 2021, Oracle and/or its affiliates.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   This file defines the NDB Cluster handler: the interface between
28   MySQL and NDB Cluster
29 */
30 
31 #include "ha_ndbcluster_glue.h"
32 #include "ha_ndbcluster.h"
33 #include <ndbapi/NdbApi.hpp>
34 #include <ndbapi/NdbIndexStat.hpp>
35 #include <ndbapi/NdbInterpretedCode.hpp>
36 #include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
37 #include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
38 
39 #include "ha_ndbcluster_binlog.h"
40 #include "ha_ndbcluster_push.h"
41 #include "ha_ndbcluster_cond.h"
42 #include "ha_ndbcluster_tables.h"
43 #include "ha_ndbcluster_connection.h"
44 #include "ndb_thd.h"
45 #include "ndb_table_guard.h"
46 #include "ndb_global_schema_lock.h"
47 #include "ndb_global_schema_lock_guard.h"
48 #include "abstract_query_plan.h"
49 #include "partition_info.h"
50 #include "ndb_dist_priv_util.h"
51 #include "ha_ndb_index_stat.h"
52 
53 #include <mysql/plugin.h>
54 #include <ndb_version.h>
55 #include <ndb_global.h>
56 #include "ndb_mi.h"
57 #include "ndb_conflict.h"
58 #include "ndb_anyvalue.h"
59 #include "ndb_binlog_extra_row_info.h"
60 #include "ndb_event_data.h"
61 #include "ndb_schema_dist.h"
62 #include "ndb_component.h"
63 #include "ndb_util_thread.h"
64 #include "ndb_local_connection.h"
65 #include "ndb_local_schema.h"
66 #include "ndb_tdc.h"
67 #include "ndb_log.h"
68 #include "ndb_name_util.h"
69 #include "../storage/ndb/src/common/util/parse_mask.hpp"
70 #include "../storage/ndb/include/util/SparseBitmask.hpp"
71 #include "m_ctype.h"
72 
73 using std::min;
74 using std::max;
75 
76 // ndb interface initialization/cleanup
77 extern "C" void ndb_init_internal();
78 extern "C" void ndb_end_internal();
79 
80 static const int DEFAULT_PARALLELISM= 0;
81 static const ha_rows DEFAULT_AUTO_PREFETCH= 32;
82 static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;
83 
84 ulong opt_ndb_extra_logging;
85 static ulong opt_ndb_wait_connected;
86 ulong opt_ndb_wait_setup;
87 static ulong opt_ndb_cache_check_time;
88 static uint opt_ndb_cluster_connection_pool;
89 static uint opt_ndb_recv_thread_activation_threshold;
90 static char* opt_ndb_recv_thread_cpu_mask;
91 static char* opt_ndb_index_stat_option;
92 static char* opt_ndb_connectstring;
93 static uint opt_ndb_nodeid;
94 
95 static MYSQL_THDVAR_UINT(
96   autoincrement_prefetch_sz,         /* name */
97   PLUGIN_VAR_RQCMDARG,
98   "Specify number of autoincrement values that are prefetched.",
99   NULL,                              /* check func. */
100   NULL,                              /* update func. */
101   1,                                 /* default */
102   1,                                 /* min */
103   65535,                             /* max */
104   0                                  /* block */
105 );
106 
107 
108 static MYSQL_THDVAR_BOOL(
109   force_send,                        /* name */
110   PLUGIN_VAR_OPCMDARG,
111   "Force send of buffers to ndb immediately without waiting for "
112   "other threads.",
113   NULL,                              /* check func. */
114   NULL,                              /* update func. */
115   1                                  /* default */
116 );
117 
118 
119 static MYSQL_THDVAR_BOOL(
120   use_exact_count,                   /* name */
121   PLUGIN_VAR_OPCMDARG,
122   "Use exact records count during query planning and for fast "
123   "select count(*), disable for faster queries.",
124   NULL,                              /* check func. */
125   NULL,                              /* update func. */
126   0                                  /* default */
127 );
128 
129 
130 static MYSQL_THDVAR_BOOL(
131   use_transactions,                  /* name */
132   PLUGIN_VAR_OPCMDARG,
133   "Use transactions for large inserts, if enabled then large "
134   "inserts will be split into several smaller transactions",
135   NULL,                              /* check func. */
136   NULL,                              /* update func. */
137   1                                  /* default */
138 );
139 
140 
141 static MYSQL_THDVAR_BOOL(
142   use_copying_alter_table,           /* name */
143   PLUGIN_VAR_OPCMDARG,
144   "Force ndbcluster to always copy tables at alter table (should "
145   "only be used if on-line alter table fails).",
146   NULL,                              /* check func. */
147   NULL,                              /* update func. */
148   0                                  /* default */
149 );
150 
151 
152 static MYSQL_THDVAR_UINT(
153   optimized_node_selection,          /* name */
154   PLUGIN_VAR_OPCMDARG,
155   "Select nodes for transactions in a more optimal way.",
156   NULL,                              /* check func. */
157   NULL,                              /* update func. */
158   3,                                 /* default */
159   0,                                 /* min */
160   3,                                 /* max */
161   0                                  /* block */
162 );
163 
164 
165 static MYSQL_THDVAR_ULONG(
166   batch_size,                        /* name */
167   PLUGIN_VAR_RQCMDARG,
168   "Batch size in bytes.",
169   NULL,                              /* check func. */
170   NULL,                              /* update func. */
171   32768,                             /* default */
172   0,                                 /* min */
173   ONE_YEAR_IN_SECONDS,               /* max */
174   0                                  /* block */
175 );
176 
177 
178 static MYSQL_THDVAR_ULONG(
179   optimization_delay,                /* name */
180   PLUGIN_VAR_RQCMDARG,
181   "For optimize table, specifies the delay in milliseconds "
182   "for each batch of rows sent.",
183   NULL,                              /* check func. */
184   NULL,                              /* update func. */
185   10,                                /* default */
186   0,                                 /* min */
187   100000,                            /* max */
188   0                                  /* block */
189 );
190 
191 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
192 #define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE
193 #else
194 #define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE
195 #endif
196 
197 static MYSQL_THDVAR_BOOL(
198   index_stat_enable,                 /* name */
199   PLUGIN_VAR_OPCMDARG,
200   "Use ndb index statistics in query optimization.",
201   NULL,                              /* check func. */
202   NULL,                              /* update func. */
203   DEFAULT_NDB_INDEX_STAT_ENABLE      /* default */
204 );
205 
206 
207 static MYSQL_THDVAR_BOOL(
208   table_no_logging,                  /* name */
209   PLUGIN_VAR_NOCMDARG,
210   "",
211   NULL,                              /* check func. */
212   NULL,                              /* update func. */
213   FALSE                              /* default */
214 );
215 
216 
217 static MYSQL_THDVAR_BOOL(
218   table_temporary,                   /* name */
219   PLUGIN_VAR_NOCMDARG,
220   "",
221   NULL,                              /* check func. */
222   NULL,                              /* update func. */
223   FALSE                              /* default */
224 );
225 
226 static MYSQL_THDVAR_UINT(
227   blob_read_batch_bytes,             /* name */
228   PLUGIN_VAR_RQCMDARG,
229   "Specifies the bytesize large Blob reads "
230   "should be batched into.  0 == No limit.",
231   NULL,                              /* check func */
232   NULL,                              /* update func */
233   65536,                             /* default */
234   0,                                 /* min */
235   UINT_MAX,                          /* max */
236   0                                  /* block */
237 );
238 
239 static MYSQL_THDVAR_UINT(
240   blob_write_batch_bytes,            /* name */
241   PLUGIN_VAR_RQCMDARG,
242   "Specifies the bytesize large Blob writes "
243   "should be batched into.  0 == No limit.",
244   NULL,                              /* check func */
245   NULL,                              /* update func */
246   65536,                             /* default */
247   0,                                 /* min */
248   UINT_MAX,                          /* max */
249   0                                  /* block */
250 );
251 
252 static MYSQL_THDVAR_UINT(
253   deferred_constraints,              /* name */
254   PLUGIN_VAR_RQCMDARG,
255   "Specified that constraints should be checked deferred (when supported)",
256   NULL,                              /* check func */
257   NULL,                              /* update func */
258   0,                                 /* default */
259   0,                                 /* min */
260   1,                                 /* max */
261   0                                  /* block */
262 );
263 
264 static MYSQL_THDVAR_BOOL(
265   show_foreign_key_mock_tables,          /* name */
266   PLUGIN_VAR_OPCMDARG,
267   "Show the mock tables which is used to support foreign_key_checks= 0. "
268   "Extra info warnings are shown when creating and dropping the tables. "
269   "The real table name is show in SHOW CREATE TABLE",
270   NULL,                              /* check func. */
271   NULL,                              /* update func. */
272   0                                  /* default */
273 );
274 
275 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
276 #define DEFAULT_NDB_JOIN_PUSHDOWN FALSE
277 #else
278 #define DEFAULT_NDB_JOIN_PUSHDOWN TRUE
279 #endif
280 
281 static MYSQL_THDVAR_BOOL(
282   join_pushdown,                     /* name */
283   PLUGIN_VAR_OPCMDARG,
284   "Enable pushing down of join to datanodes",
285   NULL,                              /* check func. */
286   NULL,                              /* update func. */
287   DEFAULT_NDB_JOIN_PUSHDOWN          /* default */
288 );
289 
290 static MYSQL_THDVAR_BOOL(
291   log_exclusive_reads,               /* name */
292   PLUGIN_VAR_OPCMDARG,
293   "Log primary key reads with exclusive locks "
294   "to allow conflict resolution based on read conflicts",
295   NULL,                              /* check func. */
296   NULL,                              /* update func. */
297   0                                  /* default */
298 );
299 
300 
301 /*
302   Required in index_stat.cc but available only from here
303   thanks to use of top level anonymous structs.
304 */
ndb_index_stat_get_enable(THD * thd)305 bool ndb_index_stat_get_enable(THD *thd)
306 {
307   const bool value = THDVAR(thd, index_stat_enable);
308   return value;
309 }
310 
ndb_show_foreign_key_mock_tables(THD * thd)311 bool ndb_show_foreign_key_mock_tables(THD* thd)
312 {
313   const bool value = THDVAR(thd, show_foreign_key_mock_tables);
314   return value;
315 }
316 
ndb_log_exclusive_reads(THD * thd)317 bool ndb_log_exclusive_reads(THD *thd)
318 {
319   const bool value = THDVAR(thd, log_exclusive_reads);
320   return value;
321 }
322 
323 static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
324 static bool ndbcluster_show_status(handlerton *hton, THD*,
325                                    stat_print_fn *,
326                                    enum ha_stat_type);
327 
328 static int ndbcluster_get_tablespace(THD* thd,
329                                      LEX_CSTRING db_name,
330                                      LEX_CSTRING table_name,
331                                      LEX_CSTRING *tablespace_name);
332 static int ndbcluster_alter_tablespace(handlerton *hton,
333                                        THD* thd,
334                                        st_alter_tablespace *info);
335 static int ndbcluster_fill_files_table(handlerton *hton,
336                                        THD *thd,
337                                        TABLE_LIST *tables,
338                                        Item *cond);
339 
340 #if MYSQL_VERSION_ID >= 50501
341 /**
342    Used to fill in INFORMATION_SCHEMA* tables.
343 
344    @param hton handle to the handlerton structure
345    @param thd the thread/connection descriptor
346    @param[in,out] tables the information schema table that is filled up
347    @param cond used for conditional pushdown to storage engine
348    @param schema_table_idx the table id that distinguishes the type of table
349 
350    @return Operation status
351  */
352 static int
ndbcluster_fill_is_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond,enum enum_schema_tables schema_table_idx)353 ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,
354                          Item *cond, enum enum_schema_tables schema_table_idx)
355 {
356   if (schema_table_idx == SCH_FILES)
357     return  ndbcluster_fill_files_table(hton, thd, tables, cond);
358   return 0;
359 }
360 #endif
361 
ndbcluster_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)362 static handler *ndbcluster_create_handler(handlerton *hton,
363                                           TABLE_SHARE *table,
364                                           MEM_ROOT *mem_root)
365 {
366   return new (mem_root) ha_ndbcluster(hton, table);
367 }
368 
369 static uint
ndbcluster_partition_flags()370 ndbcluster_partition_flags()
371 {
372   return (HA_CAN_UPDATE_PARTITION_KEY |
373           HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
374 }
375 
alter_flags(uint flags) const376 uint ha_ndbcluster::alter_flags(uint flags) const
377 {
378   const uint f=
379     HA_PARTITION_FUNCTION_SUPPORTED |
380     0;
381 
382   if (flags & Alter_info::ALTER_DROP_PARTITION)
383     return 0;
384 
385   return f;
386 }
387 
388 #define NDB_AUTO_INCREMENT_RETRIES 100
389 #define BATCH_FLUSH_SIZE (32768)
390 
391 #define ERR_PRINT(err) \
392   DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
393 
394 #define ERR_RETURN(err)                  \
395 {                                        \
396   const NdbError& tmp= err;              \
397   DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
398 }
399 
400 #define ERR_BREAK(err, code)             \
401 {                                        \
402   const NdbError& tmp= err;              \
403   code= ndb_to_mysql_error(&tmp);        \
404   break;                                 \
405 }
406 
407 #define ERR_SET(err, code)               \
408 {                                        \
409   const NdbError& tmp= err;              \
410   code= ndb_to_mysql_error(&tmp);        \
411 }
412 
413 static int ndbcluster_inited= 0;
414 
415 /*
416    Indicator and CONDVAR used to delay client and slave
417    connections until Ndb has Binlog setup
418    (bug#46955)
419 */
420 int ndb_setup_complete= 0;
421 native_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex
422 
423 extern Ndb* g_ndb;
424 
425 /// Handler synchronization
426 native_mutex_t ndbcluster_mutex;
427 
428 /// Table lock handling
429 HASH ndbcluster_open_tables;
430 HASH ndbcluster_dropped_tables;
431 
432 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
433                                 my_bool);
434 
435 static void modify_shared_stats(NDB_SHARE *share,
436                                 Ndb_local_table_statistics *local_stat);
437 
438 static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,
439                                     const NdbRecord *, struct Ndb_statistics *,
440                                     uint part_id= ~(uint)0);
441 
442 static ulong multi_range_fixed_size(int num_ranges);
443 
444 static ulong multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength);
445 
446 THD *injector_thd= 0;
447 
448 /* Status variables shown with 'show status like 'Ndb%' */
449 
450 struct st_ndb_status g_ndb_status;
451 
452 const char *g_ndb_status_index_stat_status = "";
453 long g_ndb_status_index_stat_cache_query = 0;
454 long g_ndb_status_index_stat_cache_clean = 0;
455 
456 long long g_event_data_count = 0;
457 long long g_event_nondata_count = 0;
458 long long g_event_bytes_count = 0;
459 
460 static long long g_slave_api_client_stats[Ndb::NumClientStatistics];
461 
462 static long long g_server_api_client_stats[Ndb::NumClientStatistics];
463 
464 void
update_slave_api_stats(Ndb * ndb)465 update_slave_api_stats(Ndb* ndb)
466 {
467   for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)
468     g_slave_api_client_stats[i] = ndb->getClientStat(i);
469 }
470 
471 st_ndb_slave_state g_ndb_slave_state;
472 
check_slave_state(THD * thd)473 static int check_slave_state(THD* thd)
474 {
475   DBUG_ENTER("check_slave_state");
476 
477 #ifdef HAVE_NDB_BINLOG
478   if (!thd->slave_thread)
479     DBUG_RETURN(0);
480 
481   const Uint32 runId = ndb_mi_get_slave_run_id();
482   DBUG_PRINT("info", ("Slave SQL thread run id is %u",
483                       runId));
484   if (unlikely(runId != g_ndb_slave_state.sql_run_id))
485   {
486     DBUG_PRINT("info", ("Slave run id changed from %u, "
487                         "treating as Slave restart",
488                         g_ndb_slave_state.sql_run_id));
489     g_ndb_slave_state.sql_run_id = runId;
490 
491     g_ndb_slave_state.atStartSlave();
492 
493     /* Always try to load the Max Replicated Epoch info
494      * first.
495      * Could be made optional if it's a problem
496      */
497     {
498       /*
499          Load highest replicated epoch from a local
500          MySQLD from the cluster.
501       */
502       DBUG_PRINT("info", ("Loading applied epoch information from %s",
503                           NDB_APPLY_TABLE));
504       NdbError ndb_error;
505       Uint64 highestAppliedEpoch = 0;
506       do
507       {
508         Ndb* ndb= check_ndb_in_thd(thd);
509         NDBDICT* dict= ndb->getDictionary();
510         NdbTransaction* trans= NULL;
511         ndb->setDatabaseName(NDB_REP_DB);
512         Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
513 
514         const NDBTAB* ndbtab= ndbtab_g.get_table();
515         if (unlikely(ndbtab == NULL))
516         {
517           ndb_error = dict->getNdbError();
518           break;
519         }
520 
521         trans= ndb->startTransaction();
522         if (unlikely(trans == NULL))
523         {
524           ndb_error = ndb->getNdbError();
525           break;
526         }
527 
528         do
529         {
530           NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);
531           if (unlikely(sop == NULL))
532           {
533             ndb_error = trans->getNdbError();
534             break;
535           }
536 
537           const Uint32 server_id_col_num = 0;
538           const Uint32 epoch_col_num = 1;
539           NdbRecAttr* server_id_ra = 0;
540           NdbRecAttr* epoch_ra = 0;
541 
542           if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0)   ||
543                        ((server_id_ra = sop->getValue(server_id_col_num)) == NULL)  ||
544                        ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))
545           {
546             ndb_error = sop->getNdbError();
547             break;
548           }
549 
550           if (trans->execute(NdbTransaction::Commit))
551           {
552             ndb_error = trans->getNdbError();
553             break;
554           }
555 
556           int rc = 0;
557           while (0 == (rc= sop->nextResult(true)))
558           {
559             Uint32 serverid = server_id_ra->u_32_value();
560             Uint64 epoch = epoch_ra->u_64_value();
561 
562             if ((serverid == ::server_id) ||
563                 (ndb_mi_get_ignore_server_id(serverid)))
564             {
565               highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);
566             }
567           }
568 
569           if (rc != 1)
570           {
571             ndb_error = sop->getNdbError();
572             break;
573           }
574         } while (0);
575 
576         trans->close();
577       } while(0);
578 
579       if (ndb_error.code != 0)
580       {
581         sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "
582                           "at Slave start, error %u %s",
583                           NDB_REP_DB,
584                           NDB_APPLY_TABLE,
585                           ndb_error.code, ndb_error.message);
586       }
587 
588       /*
589         Set Global status variable to the Highest Applied Epoch from
590         the Cluster DB.
591         If none was found, this will be zero.
592       */
593       g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;
594       sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",
595                             g_ndb_slave_state.max_rep_epoch,
596                             (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),
597                             (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));
598     } // Load highest replicated epoch
599   } // New Slave SQL thread run id
600 #endif
601 
602   DBUG_RETURN(0);
603 }
604 
605 
update_status_variables(Thd_ndb * thd_ndb,st_ndb_status * ns,Ndb_cluster_connection * c)606 static int update_status_variables(Thd_ndb *thd_ndb,
607                                    st_ndb_status *ns,
608                                    Ndb_cluster_connection *c)
609 {
610   ns->connected_port= c->get_connected_port();
611   ns->connected_host= c->get_connected_host();
612   if (ns->cluster_node_id != (int) c->node_id())
613   {
614     ns->cluster_node_id= c->node_id();
615     if (&g_ndb_status == ns && g_ndb_cluster_connection == c)
616       sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",
617                             ns->cluster_node_id, ns->connected_host,
618                             ns->connected_port);
619   }
620   ns->number_of_replicas= 0;
621   {
622     int n= c->get_no_ready();
623     ns->number_of_ready_data_nodes= n > 0 ?  n : 0;
624   }
625   ns->number_of_data_nodes= c->no_db_nodes();
626   ns->connect_count= c->get_connect_count();
627   ns->last_commit_epoch_server= ndb_get_latest_trans_gci();
628   if (thd_ndb)
629   {
630     ns->execute_count= thd_ndb->m_execute_count;
631     ns->scan_count= thd_ndb->m_scan_count;
632     ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;
633     ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;
634     ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;
635     ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;
636     ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;
637     ns->pushed_reads= thd_ndb->m_pushed_reads;
638     ns->last_commit_epoch_session = thd_ndb->m_last_commit_epoch_session;
639     for (int i= 0; i < MAX_NDB_NODES; i++)
640     {
641       ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];
642       ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];
643     }
644     for (int i=0; i < Ndb::NumClientStatistics; i++)
645     {
646       ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);
647     }
648     ns->schema_locks_count= thd_ndb->schema_locks_count;
649   }
650   return 0;
651 }
652 
653 /* Helper macro for definitions of NdbApi status variables */
654 
655 #define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION)                    \
656   {"api_wait_exec_complete_count" NAME_SUFFIX,                          \
657    (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ],                \
658    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
659   {"api_wait_scan_result_count" NAME_SUFFIX,                            \
660    (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ],                  \
661    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
662   {"api_wait_meta_request_count" NAME_SUFFIX,                           \
663    (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ],                 \
664    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
665   {"api_wait_nanos_count" NAME_SUFFIX,                                  \
666    (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ],                       \
667    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
668   {"api_bytes_sent_count" NAME_SUFFIX,                                  \
669    (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ],                       \
670    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
671   {"api_bytes_received_count" NAME_SUFFIX,                              \
672    (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ],                      \
673    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
674   {"api_trans_start_count" NAME_SUFFIX,                                 \
675    (char*) ARRAY_LOCATION[ Ndb::TransStartCount ],                      \
676    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
677   {"api_trans_commit_count" NAME_SUFFIX,                                \
678    (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ],                     \
679    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
680   {"api_trans_abort_count" NAME_SUFFIX,                                 \
681    (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ],                      \
682    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
683   {"api_trans_close_count" NAME_SUFFIX,                                 \
684    (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ],                      \
685    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
686   {"api_pk_op_count" NAME_SUFFIX,                                       \
687    (char*) ARRAY_LOCATION[ Ndb::PkOpCount ],                            \
688    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
689   {"api_uk_op_count" NAME_SUFFIX,                                       \
690    (char*) ARRAY_LOCATION[ Ndb::UkOpCount ],                            \
691    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
692   {"api_table_scan_count" NAME_SUFFIX,                                  \
693    (char*) ARRAY_LOCATION[ Ndb::TableScanCount ],                       \
694    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
695   {"api_range_scan_count" NAME_SUFFIX,                                  \
696    (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ],                       \
697    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
698   {"api_pruned_scan_count" NAME_SUFFIX,                                 \
699    (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ],                      \
700    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
701   {"api_scan_batch_count" NAME_SUFFIX,                                  \
702    (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ],                       \
703    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
704   {"api_read_row_count" NAME_SUFFIX,                                    \
705    (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ],                         \
706    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
707   {"api_trans_local_read_row_count" NAME_SUFFIX,                        \
708    (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ],               \
709    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
710   {"api_adaptive_send_forced_count" NAME_SUFFIX,                        \
711    (char *) ARRAY_LOCATION[ Ndb::ForcedSendsCount ],                    \
712    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
713   {"api_adaptive_send_unforced_count" NAME_SUFFIX,                      \
714    (char *) ARRAY_LOCATION[ Ndb::UnforcedSendsCount ],                  \
715    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
716   {"api_adaptive_send_deferred_count" NAME_SUFFIX,                      \
717    (char *) ARRAY_LOCATION[ Ndb::DeferredSendsCount ],                  \
718    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}
719 
720 SHOW_VAR ndb_status_variables_dynamic[]= {
721   {"cluster_node_id",     (char*) &g_ndb_status.cluster_node_id,      SHOW_LONG, SHOW_SCOPE_GLOBAL},
722   {"config_from_host",    (char*) &g_ndb_status.connected_host,       SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
723   {"config_from_port",    (char*) &g_ndb_status.connected_port,       SHOW_LONG, SHOW_SCOPE_GLOBAL},
724 //{"number_of_replicas",  (char*) &g_ndb_status.number_of_replicas,   SHOW_LONG, SHOW_SCOPE_GLOBAL},
725   {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG, SHOW_SCOPE_GLOBAL},
726   {"number_of_ready_data_nodes",
727    (char*) &g_ndb_status.number_of_ready_data_nodes,                  SHOW_LONG, SHOW_SCOPE_GLOBAL},
728   {"connect_count",      (char*) &g_ndb_status.connect_count,         SHOW_LONG, SHOW_SCOPE_GLOBAL},
729   {"execute_count",      (char*) &g_ndb_status.execute_count,         SHOW_LONG, SHOW_SCOPE_GLOBAL},
730   {"scan_count",         (char*) &g_ndb_status.scan_count,            SHOW_LONG, SHOW_SCOPE_GLOBAL},
731   {"pruned_scan_count",  (char*) &g_ndb_status.pruned_scan_count,     SHOW_LONG, SHOW_SCOPE_GLOBAL},
732   {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count,    SHOW_LONG, SHOW_SCOPE_GLOBAL},
733   NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),
734   {"sorted_scan_count",  (char*) &g_ndb_status.sorted_scan_count,     SHOW_LONG, SHOW_SCOPE_GLOBAL},
735   {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined,
736    SHOW_LONG, SHOW_SCOPE_GLOBAL},
737   {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,
738    SHOW_LONG, SHOW_SCOPE_GLOBAL},
739   {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,
740    SHOW_LONG, SHOW_SCOPE_GLOBAL},
741   {"pushed_reads",       (char*) &g_ndb_status.pushed_reads,          SHOW_LONG, SHOW_SCOPE_GLOBAL},
742   {"last_commit_epoch_server",
743                          (char*) &g_ndb_status.last_commit_epoch_server,
744                                                                       SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
745   {"last_commit_epoch_session",
746                          (char*) &g_ndb_status.last_commit_epoch_session,
747                                                                       SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
748   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
749 };
750 
751 
752 SHOW_VAR ndb_status_injector_variables[]= {
753   {"api_event_data_count_injector",     (char*) &g_event_data_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
754   {"api_event_nondata_count_injector",  (char*) &g_event_nondata_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
755   {"api_event_bytes_count_injector",    (char*) &g_event_bytes_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
756   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
757 };
758 
759 SHOW_VAR ndb_status_slave_variables[]= {
760   NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),
761   {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
762   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
763 };
764 
765 SHOW_VAR ndb_status_server_client_stat_variables[]= {
766   NDBAPI_COUNTERS("", &g_server_api_client_stats),
767   {"api_event_data_count",
768    (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ],
769    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
770   {"api_event_nondata_count",
771    (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ],
772    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
773   {"api_event_bytes_count",
774    (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ],
775    SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
776   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
777 };
778 
show_ndb_server_api_stats(THD * thd,SHOW_VAR * var,char * buff)779 static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)
780 {
781   /* This function is called when SHOW STATUS / INFO_SCHEMA wants
782    * to see one of our status vars
783    * We use this opportunity to :
784    *  1) Update the globals with current values
785    *  2) Return an array of var definitions, pointing to
786    *     the updated globals
787    */
788   ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);
789 
790   var->type= SHOW_ARRAY;
791   var->value= (char*) ndb_status_server_client_stat_variables;
792   var->scope= SHOW_SCOPE_GLOBAL;
793 
794   return 0;
795 }
796 
797 SHOW_VAR ndb_status_index_stat_variables[]= {
798   {"status",          (char*) &g_ndb_status_index_stat_status, SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
799   {"cache_query",     (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG, SHOW_SCOPE_GLOBAL},
800   {"cache_clean",     (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG, SHOW_SCOPE_GLOBAL},
801   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
802 };
803 
804 
805 /*
806   Error handling functions
807 */
808 
809 /* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
810 
ndb_to_mysql_error(const NdbError * ndberr)811 int ndb_to_mysql_error(const NdbError *ndberr)
812 {
813   /* read the mysql mapped error code */
814   int error= ndberr->mysql_code;
815 
816   switch (error)
817   {
818     /* errors for which we do not add warnings, just return mapped error code
819     */
820   case HA_ERR_NO_SUCH_TABLE:
821   case HA_ERR_KEY_NOT_FOUND:
822     return error;
823 
824     /* Mapping missing, go with the ndb error code */
825   case -1:
826   case 0:
827     /* Never map to errors below HA_ERR_FIRST */
828     if (ndberr->code < HA_ERR_FIRST)
829       error= HA_ERR_INTERNAL_ERROR;
830     else
831       error= ndberr->code;
832     break;
833     /* Mapping exists, go with the mapped code */
834   default:
835     break;
836   }
837 
838   {
839     /*
840       Push the NDB error message as warning
841       - Used to be able to use SHOW WARNINGS to get more info
842         on what the error is
843       - Used by replication to see if the error was temporary
844     */
845     if (ndberr->status == NdbError::TemporaryError)
846       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
847                           ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
848                           ndberr->code, ndberr->message, "NDB");
849     else
850       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
851                           ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
852                           ndberr->code, ndberr->message, "NDB");
853   }
854   return error;
855 }
856 
857 ulong opt_ndb_slave_conflict_role;
858 
859 #ifdef HAVE_NDB_BINLOG
860 
861 static int
862 handle_conflict_op_error(NdbTransaction* trans,
863                          const NdbError& err,
864                          const NdbOperation* op);
865 
866 static int
867 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
868                     const char* tab_name,
869                     bool table_has_blobs,
870                     const char* handling_type,
871                     const NdbRecord* key_rec,
872                     const NdbRecord* data_rec,
873                     const uchar* old_row,
874                     const uchar* new_row,
875                     enum_conflicting_op_type op_type,
876                     enum_conflict_cause conflict_cause,
877                     const NdbError& conflict_error,
878                     NdbTransaction* conflict_trans,
879                     const MY_BITMAP *write_set,
880                     Uint64 transaction_id);
881 #endif
882 
883 static const Uint32 error_op_after_refresh_op = 920;
884 
885 static inline
886 int
check_completed_operations_pre_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)887 check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
888                                       const NdbOperation *first,
889                                       const NdbOperation *last,
890                                       uint *ignore_count)
891 {
892   uint ignores= 0;
893   DBUG_ENTER("check_completed_operations_pre_commit");
894 
895   if (unlikely(first == 0))
896   {
897     assert(last == 0);
898     DBUG_RETURN(0);
899   }
900 
901   /*
902     Check that all errors are "accepted" errors
903     or exceptions to report
904   */
905 #ifdef HAVE_NDB_BINLOG
906   const NdbOperation* lastUserOp = trans->getLastDefinedOperation();
907 #endif
908   while (true)
909   {
910     const NdbError &err= first->getNdbError();
911     const bool op_has_conflict_detection = (first->getCustomData() != NULL);
912     if (!op_has_conflict_detection)
913     {
914       assert(err.code != (int) error_op_after_refresh_op);
915 
916       /* 'Normal path' - ignore key (not) present, others are errors */
917       if (err.classification != NdbError::NoError &&
918           err.classification != NdbError::ConstraintViolation &&
919           err.classification != NdbError::NoDataFound)
920       {
921         /* Non ignored error, report it */
922         DBUG_PRINT("info", ("err.code == %u", err.code));
923         DBUG_RETURN(err.code);
924       }
925     }
926 #ifdef HAVE_NDB_BINLOG
927     else
928     {
929       /*
930          Op with conflict detection, use special error handling method
931        */
932 
933       if (err.classification != NdbError::NoError)
934       {
935         int res = handle_conflict_op_error(trans,
936                                            err,
937                                            first);
938         if (res != 0)
939           DBUG_RETURN(res);
940       }
941     } // if (!op_has_conflict_detection)
942 #endif
943     if (err.classification != NdbError::NoError)
944       ignores++;
945 
946     if (first == last)
947       break;
948 
949     first= trans->getNextCompletedOperation(first);
950   }
951   if (ignore_count)
952     *ignore_count= ignores;
953 #ifdef HAVE_NDB_BINLOG
954   /*
955      Conflict detection related error handling above may have defined
956      new operations on the transaction.  If so, execute them now
957   */
958   if (trans->getLastDefinedOperation() != lastUserOp)
959   {
960     const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();
961 
962     NdbError nonMaskedError;
963     assert(nonMaskedError.code == 0);
964 
965     if (trans->execute(NdbTransaction::NoCommit,
966                        NdbOperation::AO_IgnoreError,
967                        thd_ndb->m_force_send))
968     {
969       /* Transaction execute failed, even with IgnoreError... */
970       nonMaskedError = trans->getNdbError();
971       assert(nonMaskedError.code != 0);
972     }
973     else if (trans->getNdbError().code)
974     {
975       /* Check the result codes of the operations we added */
976       const NdbOperation* conflict_op = NULL;
977       do
978       {
979         conflict_op = trans->getNextCompletedOperation(conflict_op);
980         assert(conflict_op != NULL);
981         /* We will ignore 920 which represents a refreshOp or other op
982          * arriving after a refreshOp
983          */
984         const NdbError& err = conflict_op->getNdbError();
985         if ((err.code != 0) &&
986             (err.code != (int) error_op_after_refresh_op))
987         {
988           /* Found a real error, break out and handle it */
989           nonMaskedError = err;
990           break;
991         }
992       } while (conflict_op != last_conflict_op);
993     }
994 
995     /* Handle errors with extra conflict handling operations */
996     if (nonMaskedError.code != 0)
997     {
998       if (nonMaskedError.status == NdbError::TemporaryError)
999       {
1000         /* Slave will roll back and retry entire transaction. */
1001         ERR_RETURN(nonMaskedError);
1002       }
1003       else
1004       {
1005         char msg[FN_REFLEN];
1006         my_snprintf(msg, sizeof(msg), "Executing extra operations for "
1007                     "conflict handling hit Ndb error %d '%s'",
1008                     nonMaskedError.code, nonMaskedError.message);
1009         push_warning_printf(current_thd, Sql_condition::SL_ERROR,
1010                             ER_EXCEPTIONS_WRITE_ERROR,
1011                             ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
1012         /* Slave will stop replication. */
1013         DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
1014       }
1015     }
1016   }
1017 #endif
1018   DBUG_RETURN(0);
1019 }
1020 
1021 static inline
1022 int
check_completed_operations(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1023 check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,
1024                            const NdbOperation *first,
1025                            const NdbOperation *last,
1026                            uint *ignore_count)
1027 {
1028   uint ignores= 0;
1029   DBUG_ENTER("check_completed_operations");
1030 
1031   if (unlikely(first == 0))
1032   {
1033     assert(last == 0);
1034     DBUG_RETURN(0);
1035   }
1036 
1037   /*
1038     Check that all errors are "accepted" errors
1039   */
1040   while (true)
1041   {
1042     const NdbError &err= first->getNdbError();
1043     if (err.classification != NdbError::NoError &&
1044         err.classification != NdbError::ConstraintViolation &&
1045         err.classification != NdbError::NoDataFound)
1046     {
1047 #ifdef HAVE_NDB_BINLOG
1048       /* All conflict detection etc should be done before commit */
1049       assert((err.code != (int) error_conflict_fn_violation) &&
1050              (err.code != (int) error_op_after_refresh_op));
1051 #endif
1052       DBUG_RETURN(err.code);
1053     }
1054     if (err.classification != NdbError::NoError)
1055       ignores++;
1056 
1057     if (first == last)
1058       break;
1059 
1060     first= trans->getNextCompletedOperation(first);
1061   }
1062   if (ignore_count)
1063     *ignore_count= ignores;
1064   DBUG_RETURN(0);
1065 }
1066 
1067 void
release_completed_operations(NdbTransaction * trans)1068 ha_ndbcluster::release_completed_operations(NdbTransaction *trans)
1069 {
1070   /**
1071    * mysqld reads/write blobs fully,
1072    *   which means that it does not keep blobs
1073    *   open/active over execute, which means
1074    *   that it should be safe to release anything completed here
1075    *
1076    *   i.e don't check for blobs, but just go ahead and release
1077    */
1078   trans->releaseCompletedOperations();
1079   trans->releaseCompletedQueries();
1080 }
1081 
1082 
1083 static inline
1084 int
execute_no_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,bool ignore_no_key,uint * ignore_count=0)1085 execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1086                   bool ignore_no_key,
1087                   uint *ignore_count = 0)
1088 {
1089   DBUG_ENTER("execute_no_commit");
1090   ha_ndbcluster::release_completed_operations(trans);
1091   const NdbOperation *first= trans->getFirstDefinedOperation();
1092   const NdbOperation *last= trans->getLastDefinedOperation();
1093   thd_ndb->m_execute_count++;
1094   thd_ndb->m_unsent_bytes= 0;
1095   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1096   int rc= 0;
1097   do
1098   {
1099     if (trans->execute(NdbTransaction::NoCommit,
1100                        NdbOperation::AO_IgnoreError,
1101                        thd_ndb->m_force_send))
1102     {
1103       rc= -1;
1104       break;
1105     }
1106     if (!ignore_no_key || trans->getNdbError().code == 0)
1107     {
1108       rc= trans->getNdbError().code;
1109       break;
1110     }
1111 
1112     rc = check_completed_operations_pre_commit(thd_ndb, trans,
1113                                                first, last,
1114                                                ignore_count);
1115   } while (0);
1116 
1117   if (unlikely(thd_ndb->is_slave_thread() &&
1118                rc != 0))
1119   {
1120     g_ndb_slave_state.atTransactionAbort();
1121   }
1122 
1123   DBUG_PRINT("info", ("execute_no_commit rc is %d", rc));
1124   DBUG_RETURN(rc);
1125 }
1126 
1127 
1128 static inline
1129 int
execute_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,int force_send,int ignore_error,uint * ignore_count=0)1130 execute_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1131                int force_send, int ignore_error, uint *ignore_count = 0)
1132 {
1133   DBUG_ENTER("execute_commit");
1134   NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;
1135   if (thd_ndb->m_unsent_bytes && !ignore_error)
1136   {
1137     /*
1138       We have unsent bytes and cannot ignore error.  Calling execute
1139       with NdbOperation::AO_IgnoreError will result in possible commit
1140       of a transaction although there is an error.
1141     */
1142     ao= NdbOperation::AbortOnError;
1143   }
1144   const NdbOperation *first= trans->getFirstDefinedOperation();
1145   const NdbOperation *last= trans->getLastDefinedOperation();
1146   thd_ndb->m_execute_count++;
1147   thd_ndb->m_unsent_bytes= 0;
1148   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1149   int rc= 0;
1150   do
1151   {
1152     if (trans->execute(NdbTransaction::Commit, ao, force_send))
1153     {
1154       rc= -1;
1155       break;
1156     }
1157 
1158     if (!ignore_error || trans->getNdbError().code == 0)
1159     {
1160       rc= trans->getNdbError().code;
1161       break;
1162     }
1163 
1164     rc= check_completed_operations(thd_ndb, trans, first, last,
1165                                    ignore_count);
1166   } while (0);
1167 
1168   if (likely(rc == 0))
1169   {
1170     /* Committed ok, update session GCI, if it's available
1171      * (Not available for reads, empty transactions etc...)
1172      */
1173     Uint64 reportedGCI;
1174     if (trans->getGCI(&reportedGCI) == 0 &&
1175         reportedGCI != 0)
1176     {
1177       assert(reportedGCI >= thd_ndb->m_last_commit_epoch_session);
1178       thd_ndb->m_last_commit_epoch_session = reportedGCI;
1179     }
1180   }
1181 
1182   if (thd_ndb->is_slave_thread())
1183   {
1184     if (likely(rc == 0))
1185     {
1186       /* Success */
1187       g_ndb_slave_state.atTransactionCommit(thd_ndb->m_last_commit_epoch_session);
1188     }
1189     else
1190     {
1191       g_ndb_slave_state.atTransactionAbort();
1192     }
1193   }
1194 
1195   DBUG_PRINT("info", ("execute_commit rc is %d", rc));
1196   DBUG_RETURN(rc);
1197 }
1198 
1199 static inline
execute_no_commit_ie(Thd_ndb * thd_ndb,NdbTransaction * trans)1200 int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)
1201 {
1202   DBUG_ENTER("execute_no_commit_ie");
1203   ha_ndbcluster::release_completed_operations(trans);
1204   int res= trans->execute(NdbTransaction::NoCommit,
1205                           NdbOperation::AO_IgnoreError,
1206                           thd_ndb->m_force_send);
1207   thd_ndb->m_unsent_bytes= 0;
1208   thd_ndb->m_execute_count++;
1209   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1210   DBUG_RETURN(res);
1211 }
1212 
1213 /*
1214   Place holder for ha_ndbcluster thread specific data
1215 */
1216 typedef struct st_thd_ndb_share {
1217   const void *key;
1218   struct Ndb_local_table_statistics stat;
1219 } THD_NDB_SHARE;
1220 static
thd_ndb_share_get_key(THD_NDB_SHARE * thd_ndb_share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))1221 uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,
1222                             my_bool not_used MY_ATTRIBUTE((unused)))
1223 {
1224   *length= sizeof(thd_ndb_share->key);
1225   return (uchar*) &thd_ndb_share->key;
1226 }
1227 
Thd_ndb(THD * thd)1228 Thd_ndb::Thd_ndb(THD* thd) :
1229   m_thd(thd),
1230   m_slave_thread(thd->slave_thread),
1231   m_skip_binlog_setup_in_find_files(false),
1232   schema_locks_count(0),
1233   m_last_commit_epoch_session(0)
1234 {
1235   connection= ndb_get_cluster_connection();
1236   m_connect_count= connection->get_connect_count();
1237   ndb= new Ndb(connection, "");
1238   lock_count= 0;
1239   start_stmt_count= 0;
1240   save_point_count= 0;
1241   count= 0;
1242   trans= NULL;
1243   m_handler= NULL;
1244   m_error= FALSE;
1245   options= 0;
1246   (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,
1247                       (my_hash_get_key)thd_ndb_share_get_key, 0, 0,
1248                       PSI_INSTRUMENT_ME);
1249   m_unsent_bytes= 0;
1250   m_execute_count= 0;
1251   m_scan_count= 0;
1252   m_pruned_scan_count= 0;
1253   m_sorted_scan_count= 0;
1254   m_pushed_queries_defined= 0;
1255   m_pushed_queries_dropped= 0;
1256   m_pushed_queries_executed= 0;
1257   m_pushed_reads= 0;
1258   memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));
1259   memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));
1260   global_schema_lock_trans= NULL;
1261   global_schema_lock_count= 0;
1262   global_schema_lock_error= 0;
1263   init_alloc_root(PSI_INSTRUMENT_ME,
1264                   &m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);
1265 }
1266 
~Thd_ndb()1267 Thd_ndb::~Thd_ndb()
1268 {
1269   if (opt_ndb_extra_logging > 1)
1270   {
1271     /*
1272       print some stats about the connection at disconnect
1273     */
1274     for (int i= 0; i < MAX_NDB_NODES; i++)
1275     {
1276       if (m_transaction_hint_count[i] > 0 ||
1277           m_transaction_no_hint_count[i] > 0)
1278       {
1279         sql_print_information("tid %u: node[%u] "
1280                               "transaction_hint=%u, transaction_no_hint=%u",
1281                               m_thd->thread_id(), i,
1282                               m_transaction_hint_count[i],
1283                               m_transaction_no_hint_count[i]);
1284       }
1285     }
1286   }
1287   if (ndb)
1288   {
1289     delete ndb;
1290     ndb= NULL;
1291   }
1292   changed_tables.empty();
1293   my_hash_free(&open_tables);
1294   free_root(&m_batch_mem_root, MYF(0));
1295 }
1296 
1297 
get_ndb(THD * thd) const1298 Ndb *ha_ndbcluster::get_ndb(THD *thd) const
1299 {
1300   return thd_get_thd_ndb(thd)->ndb;
1301 }
1302 
1303 /*
1304  * manage uncommitted insert/deletes during transactio to get records correct
1305  */
1306 
set_rec_per_key()1307 void ha_ndbcluster::set_rec_per_key()
1308 {
1309   DBUG_ENTER("ha_ndbcluster::set_rec_per_key");
1310   /*
1311     Set up the 'rec_per_key[]' for keys which we have good knowledge
1312     about the distribution. 'rec_per_key[]' is init'ed to '0' by
1313     open_binary_frm(), which is interpreted as 'unknown' by optimizer.
1314     -> Not setting 'rec_per_key[]' will force the optimizer to use
1315     its own heuristic to estimate 'records pr. key'.
1316   */
1317   for (uint i=0 ; i < table_share->keys ; i++)
1318   {
1319     bool is_unique_index= false;
1320     KEY* key_info= table->key_info + i;
1321     switch (get_index_type(i))
1322     {
1323     case UNIQUE_INDEX:
1324     case PRIMARY_KEY_INDEX:
1325     {
1326       // Index is unique when all 'key_parts' are specified,
1327       // else distribution is unknown and not specified here.
1328       is_unique_index= true;
1329       break;
1330     }
1331     case UNIQUE_ORDERED_INDEX:
1332     case PRIMARY_KEY_ORDERED_INDEX:
1333       is_unique_index= true;
1334       // intentional fall thru to logic for ordered index
1335     case ORDERED_INDEX:
1336       // 'Records pr. key' are unknown for non-unique indexes.
1337       // (May change when we get better index statistics.)
1338     {
1339       THD *thd= current_thd;
1340       const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
1341                                     THDVAR(thd, index_stat_enable);
1342       if (index_stat_enable)
1343       {
1344         int err= ndb_index_stat_set_rpk(i);
1345         if (err != 0 &&
1346             /* no stats is not unexpected error */
1347             err != NdbIndexStat::NoIndexStats &&
1348             /* warning was printed at first error */
1349             err != NdbIndexStat::MyHasError &&
1350             /* stats thread aborted request */
1351             err != NdbIndexStat::MyAbortReq)
1352         {
1353           push_warning_printf(thd, Sql_condition::SL_WARNING,
1354                               ER_CANT_GET_STAT, /* pun? */
1355                               "index stats (RPK) for key %s:"
1356                               " unexpected error %d",
1357                               key_info->name, err);
1358         }
1359       }
1360       // no fallback method...
1361       break;
1362     }
1363     default:
1364       assert(false);
1365     }
1366     // set rows per key to 1 for complete key given for unique/primary index
1367     if (is_unique_index)
1368     {
1369       key_info->set_records_per_key(key_info->user_defined_key_parts-1, 1.0f);
1370     }
1371   }
1372   DBUG_VOID_RETURN;
1373 }
1374 
records(ha_rows * num_rows)1375 int ha_ndbcluster::records(ha_rows* num_rows)
1376 {
1377   DBUG_ENTER("ha_ndbcluster::records");
1378   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1379                       m_table->getTableId(),
1380                       m_table_info->no_uncommitted_rows_count));
1381 
1382   int error = update_stats(table->in_use, 1);
1383   if (error != 0)
1384   {
1385     *num_rows = HA_POS_ERROR;
1386     DBUG_RETURN(error);
1387   }
1388 
1389   *num_rows = stats.records;
1390   DBUG_RETURN(0);
1391 }
1392 
no_uncommitted_rows_execute_failure()1393 void ha_ndbcluster::no_uncommitted_rows_execute_failure()
1394 {
1395   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
1396   get_thd_ndb(current_thd)->m_error= TRUE;
1397   DBUG_VOID_RETURN;
1398 }
1399 
no_uncommitted_rows_update(int c)1400 void ha_ndbcluster::no_uncommitted_rows_update(int c)
1401 {
1402   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
1403   struct Ndb_local_table_statistics *local_info= m_table_info;
1404   local_info->no_uncommitted_rows_count+= c;
1405   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1406                       m_table->getTableId(),
1407                       local_info->no_uncommitted_rows_count));
1408   DBUG_VOID_RETURN;
1409 }
1410 
1411 
ndb_err(NdbTransaction * trans)1412 int ha_ndbcluster::ndb_err(NdbTransaction *trans)
1413 {
1414   THD *thd= current_thd;
1415   int res;
1416   NdbError err= trans->getNdbError();
1417   DBUG_ENTER("ndb_err");
1418 
1419   switch (err.classification) {
1420   case NdbError::SchemaError:
1421   {
1422     // TODO perhaps we need to do more here, invalidate also in the cache
1423     m_table->setStatusInvalid();
1424     /* Close other open handlers not used by any thread */
1425     ndb_tdc_close_cached_table(thd, m_dbname, m_tabname);
1426     break;
1427   }
1428   default:
1429     break;
1430   }
1431   res= ndb_to_mysql_error(&err);
1432   DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d",
1433                       err.code, res));
1434   if (res == HA_ERR_FOUND_DUPP_KEY)
1435   {
1436     char *error_data= err.details;
1437     uint dupkey= MAX_KEY;
1438 
1439     for (uint i= 0; i < MAX_KEY; i++)
1440     {
1441       if (m_index[i].type == UNIQUE_INDEX ||
1442           m_index[i].type == UNIQUE_ORDERED_INDEX)
1443       {
1444         const NDBINDEX *unique_index=
1445           (const NDBINDEX *) m_index[i].unique_index;
1446         if (unique_index && UintPtr(unique_index->getObjectId()) == UintPtr(error_data))
1447         {
1448           dupkey= i;
1449           break;
1450         }
1451       }
1452     }
1453     if (m_rows_to_insert == 1)
1454     {
1455       /*
1456 	We can only distinguish between primary and non-primary
1457 	violations here, so we need to return MAX_KEY for non-primary
1458 	to signal that key is unknown
1459       */
1460       m_dupkey= err.code == 630 ? table_share->primary_key : dupkey;
1461     }
1462     else
1463     {
1464       /* We are batching inserts, offending key is not available */
1465       m_dupkey= (uint) -1;
1466     }
1467   }
1468   DBUG_RETURN(res);
1469 }
1470 
1471 
1472 /**
1473   Override the default get_error_message in order to add the
1474   error message of NDB .
1475 */
1476 
get_error_message(int error,String * buf)1477 bool ha_ndbcluster::get_error_message(int error,
1478                                       String *buf)
1479 {
1480   DBUG_ENTER("ha_ndbcluster::get_error_message");
1481   DBUG_PRINT("enter", ("error: %d", error));
1482 
1483   Ndb *ndb= check_ndb_in_thd(current_thd);
1484   if (!ndb)
1485     DBUG_RETURN(FALSE);
1486 
1487   const NdbError err= ndb->getNdbError(error);
1488   bool temporary= err.status==NdbError::TemporaryError;
1489   buf->set(err.message, (uint32)strlen(err.message), &my_charset_bin);
1490   DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
1491   DBUG_RETURN(temporary);
1492 }
1493 
1494 
1495 /*
1496   field_used_length() returns the number of bytes actually used to
1497   store the data of the field. So for a varstring it includes both
1498   length byte(s) and string data, and anything after data_length()
1499   bytes are unused.
1500 */
1501 static
field_used_length(const Field * field)1502 uint32 field_used_length(const Field* field)
1503 {
1504  if (field->type() == MYSQL_TYPE_VARCHAR)
1505  {
1506    const Field_varstring* f = static_cast<const Field_varstring*>(field);
1507    return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();
1508                             // ^ no 'data_length() const'
1509  }
1510  return field->pack_length();
1511 }
1512 
1513 
1514 /**
1515   Check if MySQL field type forces var part in ndb storage
1516 */
field_type_forces_var_part(enum_field_types type)1517 static bool field_type_forces_var_part(enum_field_types type)
1518 {
1519   switch (type) {
1520   case MYSQL_TYPE_VAR_STRING:
1521   case MYSQL_TYPE_VARCHAR:
1522     return TRUE;
1523   case MYSQL_TYPE_TINY_BLOB:
1524   case MYSQL_TYPE_BLOB:
1525   case MYSQL_TYPE_MEDIUM_BLOB:
1526   case MYSQL_TYPE_LONG_BLOB:
1527   case MYSQL_TYPE_JSON:
1528   case MYSQL_TYPE_GEOMETRY:
1529     return FALSE;
1530   default:
1531     return FALSE;
1532   }
1533 }
1534 
1535 /*
1536   Return a generic buffer that will remain valid until after next execute.
1537 
1538   The memory is freed by the first call to add_row_check_if_batch_full_size()
1539   following any execute() call. The intention is that the memory is associated
1540   with one batch of operations during batched slave updates.
1541 
1542   Note in particular that using get_buffer() / copy_row_to_buffer() separately
1543   from add_row_check_if_batch_full_size() could make meory usage grow without
1544   limit, and that this sequence:
1545 
1546     execute()
1547     get_buffer() / copy_row_to_buffer()
1548     add_row_check_if_batch_full_size()
1549     ...
1550     execute()
1551 
1552   will free the memory already at add_row_check_if_batch_full_size() time, it
1553   will not remain valid until the second execute().
1554 */
1555 uchar *
get_buffer(Thd_ndb * thd_ndb,uint size)1556 ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)
1557 {
1558   return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);
1559 }
1560 
1561 uchar *
copy_row_to_buffer(Thd_ndb * thd_ndb,const uchar * record)1562 ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)
1563 {
1564   uchar *row= get_buffer(thd_ndb, table->s->reclength);
1565   if (unlikely(!row))
1566     return NULL;
1567   memcpy(row, record, table->s->reclength);
1568   return row;
1569 }
1570 
1571 /**
1572  * findBlobError
1573  * This method attempts to find an error in the hierarchy of runtime
1574  * NDBAPI objects from Blob up to transaction.
1575  * It will return -1 if no error is found, 0 if an error is found.
1576  */
findBlobError(NdbError & error,NdbBlob * pBlob)1577 int findBlobError(NdbError& error, NdbBlob* pBlob)
1578 {
1579   error= pBlob->getNdbError();
1580   if (error.code != 0)
1581     return 0;
1582 
1583   const NdbOperation* pOp= pBlob->getNdbOperation();
1584   error= pOp->getNdbError();
1585   if (error.code != 0)
1586     return 0;
1587 
1588   NdbTransaction* pTrans= pOp->getNdbTransaction();
1589   error= pTrans->getNdbError();
1590   if (error.code != 0)
1591     return 0;
1592 
1593   /* No error on any of the objects */
1594   return -1;
1595 }
1596 
1597 
1598 /*
1599  This routine calculates the length of the blob/text after applying mysql limits
1600  on blob/text sizes. If the blob contains multi-byte characters, the length is
1601  reduced till the end of the last well-formed char, so that data is not truncated
1602  in the middle of a multi-byte char.
1603  */
calc_ndb_blob_len(const CHARSET_INFO * cs,uchar * blob_ptr,uint64 maxlen)1604 uint64 calc_ndb_blob_len(const CHARSET_INFO *cs, uchar *blob_ptr, uint64 maxlen)
1605 {
1606   int errors = 0;
1607 
1608   const char *begin = (const char*) blob_ptr;
1609   const char *end = (const char*) (blob_ptr+maxlen);
1610 
1611   // avoid truncation in the middle of a multi-byte character by
1612   // stopping at end of last well-formed character before max length
1613   uint32 numchars = cs->cset->numchars(cs, begin, end);
1614   uint64 len64 = cs->cset->well_formed_len(cs, begin, end, numchars, &errors);
1615   assert(len64 <= maxlen);
1616 
1617   return len64;
1618 }
1619 
g_get_ndb_blobs_value(NdbBlob * ndb_blob,void * arg)1620 int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
1621 {
1622   ha_ndbcluster *ha= (ha_ndbcluster *)arg;
1623   DBUG_ENTER("g_get_ndb_blobs_value");
1624   DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));
1625 
1626   if (ha->m_blob_counter == 0)   /* Reset total size at start of row */
1627     ha->m_blobs_row_total_size= 0;
1628 
1629   /* Count the total length needed for blob data. */
1630   int isNull;
1631   if (ndb_blob->getNull(isNull) != 0)
1632     ERR_RETURN(ndb_blob->getNdbError());
1633   if (isNull == 0) {
1634     Uint64 len64= 0;
1635     if (ndb_blob->getLength(len64) != 0)
1636       ERR_RETURN(ndb_blob->getNdbError());
1637     /* Align to Uint64. */
1638     ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);
1639     if (ha->m_blobs_row_total_size > 0xffffffff)
1640     {
1641       assert(FALSE);
1642       DBUG_RETURN(-1);
1643     }
1644     DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",
1645                         ha->m_blob_counter,
1646                         len64,
1647                         ha->m_blobs_row_total_size));
1648   }
1649   ha->m_blob_counter++;
1650 
1651   /*
1652     Wait until all blobs in this row are active, so we can allocate
1653     and use a common buffer containing all.
1654   */
1655   if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)
1656     DBUG_RETURN(0);
1657 
1658   /* Reset blob counter for next row (scan scenario) */
1659   ha->m_blob_counter= 0;
1660 
1661   /* Re-allocate bigger blob buffer for this row if necessary. */
1662   if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)
1663   {
1664     my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1665     DBUG_PRINT("info", ("allocate blobs buffer size %u",
1666                         (uint32)(ha->m_blobs_row_total_size)));
1667     /* Windows compiler complains about my_malloc on non-size_t
1668      * validate mapping from Uint64 to size_t
1669      */
1670     if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)
1671     {
1672       ha->m_blobs_buffer= NULL;
1673       ha->m_blobs_buffer_size= 0;
1674       DBUG_RETURN(-1);
1675     }
1676 
1677     ha->m_blobs_buffer=
1678       (uchar*) my_malloc(PSI_INSTRUMENT_ME,
1679                          (size_t) ha->m_blobs_row_total_size, MYF(MY_WME));
1680     if (ha->m_blobs_buffer == NULL)
1681     {
1682       ha->m_blobs_buffer_size= 0;
1683       DBUG_RETURN(-1);
1684     }
1685     ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;
1686   }
1687 
1688   /*
1689     Now read all blob data.
1690     If we know the destination mysqld row, we also set the blob null bit and
1691     pointer/length (if not, it will be done instead in unpack_record()).
1692   */
1693   uint32 offset= 0;
1694   for (uint i= 0; i < ha->table->s->fields; i++)
1695   {
1696     Field *field= ha->table->field[i];
1697     if (! (field->flags & BLOB_FLAG))
1698       continue;
1699     NdbValue value= ha->m_value[i];
1700     if (value.blob == NULL)
1701     {
1702       DBUG_PRINT("info",("[%u] skipped", i));
1703       continue;
1704     }
1705     Field_blob *field_blob= (Field_blob *)field;
1706     NdbBlob *ndb_blob= value.blob;
1707     int isNull;
1708     if (ndb_blob->getNull(isNull) != 0)
1709       ERR_RETURN(ndb_blob->getNdbError());
1710     if (isNull == 0) {
1711       Uint64 len64= 0;
1712       if (ndb_blob->getLength(len64) != 0)
1713         ERR_RETURN(ndb_blob->getNdbError());
1714       assert(len64 < 0xffffffff);
1715       uchar *buf= ha->m_blobs_buffer + offset;
1716 	  uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);
1717       if (ndb_blob->readData(buf, len) != 0)
1718       {
1719         NdbError err;
1720         if (findBlobError(err, ndb_blob) == 0)
1721         {
1722           ERR_RETURN(err);
1723         }
1724         else
1725         {
1726           /* Should always have some error code set */
1727           assert(err.code != 0);
1728           ERR_RETURN(err);
1729         }
1730       }
1731       DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u",
1732                           i, offset, (long) buf, len));
1733       assert(len == len64);
1734       if (ha->m_blob_destination_record)
1735       {
1736         my_ptrdiff_t ptrdiff=
1737           ha->m_blob_destination_record - ha->table->record[0];
1738         field_blob->move_field_offset(ptrdiff);
1739 
1740         if(len > field_blob->max_data_length())
1741         {
1742           len = calc_ndb_blob_len(field_blob->charset(),
1743                                   buf, field_blob->max_data_length());
1744 
1745           // push a warning
1746           push_warning_printf(current_thd, Sql_condition::SL_WARNING,
1747                       WARN_DATA_TRUNCATED,
1748                       "Truncated value from TEXT field \'%s\'", field_blob->field_name);
1749         }
1750 
1751         field_blob->set_ptr(len, buf);
1752         field_blob->set_notnull();
1753         field_blob->move_field_offset(-ptrdiff);
1754       }
1755       offset+= Uint32((len64 + 7) & ~((Uint64)7));
1756     }
1757     else if (ha->m_blob_destination_record)
1758     {
1759       /* Have to set length even in this case. */
1760       my_ptrdiff_t ptrdiff=
1761         ha->m_blob_destination_record - ha->table->record[0];
1762       uchar *buf= ha->m_blobs_buffer + offset;
1763       field_blob->move_field_offset(ptrdiff);
1764       field_blob->set_ptr((uint32)0, buf);
1765       field_blob->set_null();
1766       field_blob->move_field_offset(-ptrdiff);
1767       DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
1768     }
1769   }
1770 
1771   if (!ha->m_active_cursor)
1772   {
1773     /* Non-scan, Blob reads have been issued
1774      * execute them and then close the Blob
1775      * handles
1776      */
1777     for (uint i= 0; i < ha->table->s->fields; i++)
1778     {
1779       Field *field= ha->table->field[i];
1780       if (! (field->flags & BLOB_FLAG))
1781         continue;
1782       NdbValue value= ha->m_value[i];
1783       if (value.blob == NULL)
1784       {
1785         DBUG_PRINT("info",("[%u] skipped", i));
1786         continue;
1787       }
1788       NdbBlob *ndb_blob= value.blob;
1789 
1790       assert(ndb_blob->getState() == NdbBlob::Active);
1791 
1792       /* Call close() with execPendingBlobOps == true
1793        * For LM_CommittedRead access, this will enqueue
1794        * an unlock operation, which the Blob framework
1795        * code invoking this callback will execute before
1796        * returning control to the caller of execute()
1797        */
1798       if (ndb_blob->close(true) != 0)
1799       {
1800         ERR_RETURN(ndb_blob->getNdbError());
1801       }
1802     }
1803   }
1804 
1805   DBUG_RETURN(0);
1806 }
1807 
1808 /*
1809   Request reading of blob values.
1810 
1811   If dst_record is specified, the blob null bit, pointer, and length will be
1812   set in that record. Otherwise they must be set later by calling
1813   unpack_record().
1814 */
1815 int
get_blob_values(const NdbOperation * ndb_op,uchar * dst_record,const MY_BITMAP * bitmap)1816 ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,
1817                                const MY_BITMAP *bitmap)
1818 {
1819   uint i;
1820   DBUG_ENTER("ha_ndbcluster::get_blob_values");
1821 
1822   m_blob_counter= 0;
1823   m_blob_expected_count_per_row= 0;
1824   m_blob_destination_record= dst_record;
1825   m_blobs_row_total_size= 0;
1826   ndb_op->getNdbTransaction()->
1827     setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));
1828 
1829   for (i= 0; i < table_share->fields; i++)
1830   {
1831     Field *field= table->field[i];
1832     if (!(field->flags & BLOB_FLAG))
1833       continue;
1834 
1835     DBUG_PRINT("info", ("fieldnr=%d", i));
1836     NdbBlob *ndb_blob;
1837     if (bitmap_is_set(bitmap, i))
1838     {
1839       if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||
1840           ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)
1841         DBUG_RETURN(1);
1842       m_blob_expected_count_per_row++;
1843     }
1844     else
1845       ndb_blob= NULL;
1846 
1847     m_value[i].blob= ndb_blob;
1848   }
1849 
1850   DBUG_RETURN(0);
1851 }
1852 
1853 int
set_blob_values(const NdbOperation * ndb_op,my_ptrdiff_t row_offset,const MY_BITMAP * bitmap,uint * set_count,bool batch)1854 ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,
1855                                my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,
1856                                uint *set_count, bool batch)
1857 {
1858   uint field_no;
1859   uint *blob_index, *blob_index_end;
1860   int res= 0;
1861   DBUG_ENTER("ha_ndbcluster::set_blob_values");
1862 
1863   *set_count= 0;
1864 
1865   if (table_share->blob_fields == 0)
1866     DBUG_RETURN(0);
1867 
1868   ndb_op->getNdbTransaction()->
1869     setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));
1870   blob_index= table_share->blob_field;
1871   blob_index_end= blob_index + table_share->blob_fields;
1872   do
1873   {
1874     field_no= *blob_index;
1875     /* A NULL bitmap sets all blobs. */
1876     if (bitmap && !bitmap_is_set(bitmap, field_no))
1877       continue;
1878     Field *field= table->field[field_no];
1879 
1880     NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);
1881     if (ndb_blob == NULL)
1882       ERR_RETURN(ndb_op->getNdbError());
1883     if (field->is_real_null(row_offset))
1884     {
1885       DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));
1886       if (ndb_blob->setNull() != 0)
1887         ERR_RETURN(ndb_op->getNdbError());
1888     }
1889     else
1890     {
1891       Field_blob *field_blob= (Field_blob *)field;
1892 
1893       // Get length and pointer to data
1894       const uchar *field_ptr= field->ptr + row_offset;
1895       uint32 blob_len= field_blob->get_length(field_ptr);
1896       uchar* blob_ptr= NULL;
1897       field_blob->get_ptr(&blob_ptr);
1898 
1899       // Looks like NULL ptr signals length 0 blob
1900       if (blob_ptr == NULL) {
1901         assert(blob_len == 0);
1902         blob_ptr= (uchar*)"";
1903       }
1904 
1905       DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",
1906                            (long) blob_ptr, blob_len));
1907       DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));
1908 
1909       /*
1910         NdbBlob requires the data pointer to remain valid until execute() time.
1911         So when batching, we need to copy the value to a temporary buffer.
1912       */
1913       if (batch && blob_len > 0)
1914       {
1915         uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);
1916         if (!tmp_buf)
1917           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1918         memcpy(tmp_buf, blob_ptr, blob_len);
1919         blob_ptr= tmp_buf;
1920       }
1921       res= ndb_blob->setValue((char*)blob_ptr, blob_len);
1922       if (res != 0)
1923         ERR_RETURN(ndb_op->getNdbError());
1924     }
1925 
1926     ++(*set_count);
1927   } while (++blob_index != blob_index_end);
1928 
1929   DBUG_RETURN(res);
1930 }
1931 
1932 
1933 /**
1934   Check if any set or get of blob value in current query.
1935 */
1936 
uses_blob_value(const MY_BITMAP * bitmap) const1937 bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const
1938 {
1939   uint *blob_index, *blob_index_end;
1940   if (table_share->blob_fields == 0)
1941     return FALSE;
1942 
1943   blob_index=     table_share->blob_field;
1944   blob_index_end= blob_index + table_share->blob_fields;
1945   do
1946   {
1947     if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
1948       return TRUE;
1949   } while (++blob_index != blob_index_end);
1950   return FALSE;
1951 }
1952 
release_blobs_buffer()1953 void ha_ndbcluster::release_blobs_buffer()
1954 {
1955   DBUG_ENTER("releaseBlobsBuffer");
1956   if (m_blobs_buffer_size > 0)
1957   {
1958     DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));
1959     my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1960     m_blobs_buffer= 0;
1961     m_blobs_row_total_size= 0;
1962     m_blobs_buffer_size= 0;
1963   }
1964   DBUG_VOID_RETURN;
1965 }
1966 
1967 
1968 /*
1969   Does type support a default value?
1970 */
1971 static bool
type_supports_default_value(enum_field_types mysql_type)1972 type_supports_default_value(enum_field_types mysql_type)
1973 {
1974   bool ret = (mysql_type != MYSQL_TYPE_BLOB &&
1975               mysql_type != MYSQL_TYPE_TINY_BLOB &&
1976               mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&
1977               mysql_type != MYSQL_TYPE_LONG_BLOB &&
1978               mysql_type != MYSQL_TYPE_JSON &&
1979               mysql_type != MYSQL_TYPE_GEOMETRY);
1980 
1981   return ret;
1982 }
1983 
1984 /**
1985    Check that Ndb data dictionary has the same default values
1986    as MySQLD for the current table.
1987    Called as part of a DBUG check as part of table open
1988 
1989    Returns
1990      0  - Defaults are ok
1991      -1 - Some default(s) are bad
1992 */
check_default_values(const NDBTAB * ndbtab)1993 int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)
1994 {
1995   /* Debug only method for checking table defaults aligned
1996      between MySQLD and Ndb
1997   */
1998   bool defaults_aligned= true;
1999 
2000   if (ndbtab->hasDefaultValues())
2001   {
2002     /* Ndb supports native defaults for non-pk columns */
2003     my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);
2004 
2005     for (uint f=0; f < table_share->fields; f++)
2006     {
2007       Field* field= table->field[f]; // Use Field struct from MySQLD table rep
2008       const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index);
2009 
2010       if ((! (field->flags & (PRI_KEY_FLAG |
2011                               NO_DEFAULT_VALUE_FLAG))) &&
2012           type_supports_default_value(field->real_type()))
2013       {
2014         /* We expect Ndb to have a native default for this
2015          * column
2016          */
2017         my_ptrdiff_t src_offset= table_share->default_values -
2018           field->table->record[0];
2019 
2020         /* Move field by offset to refer to default value */
2021         field->move_field_offset(src_offset);
2022 
2023         const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();
2024 
2025         if (ndb_default == NULL)
2026           /* MySQLD default must also be NULL */
2027           defaults_aligned= field->is_null();
2028         else
2029         {
2030           if (field->type() != MYSQL_TYPE_BIT)
2031           {
2032             defaults_aligned= (0 == field->cmp(ndb_default));
2033           }
2034           else
2035           {
2036             longlong value= (static_cast<Field_bit*>(field))->val_int();
2037             /* Map to NdbApi format - two Uint32s */
2038             Uint32 out[2];
2039             out[0] = 0;
2040             out[1] = 0;
2041             for (int b=0; b < 64; b++)
2042             {
2043               out[b >> 5] |= (value & 1) << (b & 31);
2044 
2045               value= value >> 1;
2046             }
2047             Uint32 defaultLen = field_used_length(field);
2048             defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);
2049             defaults_aligned= (0 == memcmp(ndb_default,
2050                                            out,
2051                                            defaultLen));
2052           }
2053         }
2054 
2055         field->move_field_offset(-src_offset);
2056 
2057         if (unlikely(!defaults_aligned))
2058         {
2059           sql_print_error("NDB Internal error: Default values differ "
2060                           "for column %u, ndb_default: %d",
2061                           field->field_index, ndb_default != NULL);
2062         }
2063       }
2064       else
2065       {
2066         /* We don't expect Ndb to have a native default for this column */
2067         if (unlikely(ndbCol->getDefaultValue() != NULL))
2068         {
2069           /* Didn't expect that */
2070           sql_print_error("NDB Internal error: Column %u has native "
2071                           "default, but shouldn't. Flags=%u, type=%u",
2072                           field->field_index, field->flags,
2073                           field->real_type());
2074           defaults_aligned= false;
2075         }
2076       }
2077       if (unlikely(!defaults_aligned))
2078       {
2079         // Dump field
2080         sql_print_error("field[ name: '%s', type: %u, real_type: %u, "
2081                         "flags: 0x%x, is_null: %d]",
2082                         field->field_name, field->type(), field->real_type(),
2083                         field->flags, field->is_null());
2084         // Dump ndbCol
2085         sql_print_error("ndbCol[name: '%s', type: %u, column_no: %d, "
2086                         "nullable: %d]",
2087                         ndbCol->getName(), ndbCol->getType(),
2088                         ndbCol->getColumnNo(), ndbCol->getNullable());
2089         break;
2090       }
2091     }
2092     tmp_restore_column_map(table->read_set, old_map);
2093   }
2094 
2095   return (defaults_aligned? 0: -1);
2096 }
2097 
get_metadata(THD * thd,const char * path)2098 int ha_ndbcluster::get_metadata(THD *thd, const char *path)
2099 {
2100   Ndb *ndb= get_thd_ndb(thd)->ndb;
2101   NDBDICT *dict= ndb->getDictionary();
2102   const NDBTAB *tab;
2103   int error;
2104   DBUG_ENTER("get_metadata");
2105   DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));
2106 
2107   assert(m_table == NULL);
2108   assert(m_table_info == NULL);
2109 
2110   uchar *data= NULL, *pack_data= NULL;
2111   size_t length, pack_length;
2112 
2113   /*
2114     Compare FrmData in NDB with frm file from disk.
2115   */
2116   error= 0;
2117   if (readfrm(path, &data, &length) ||
2118       packfrm(data, length, &pack_data, &pack_length))
2119   {
2120     my_free(data, MYF(MY_ALLOW_ZERO_PTR));
2121     my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));
2122     DBUG_RETURN(1);
2123   }
2124 
2125   ndb->setDatabaseName(m_dbname);
2126   Ndb_table_guard ndbtab_g(dict, m_tabname);
2127   if (!(tab= ndbtab_g.get_table()))
2128     ERR_RETURN(dict->getNdbError());
2129 
2130   if (get_ndb_share_state(m_share) != NSS_ALTERED
2131       && cmp_frm(tab, pack_data, pack_length))
2132   {
2133     DBUG_PRINT("error",
2134                ("metadata, pack_length: %lu  getFrmLength: %d  memcmp: %d",
2135                 (ulong) pack_length, tab->getFrmLength(),
2136                 memcmp(pack_data, tab->getFrmData(), pack_length)));
2137     DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);
2138     DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());
2139     error= HA_ERR_TABLE_DEF_CHANGED;
2140   }
2141   my_free((char*)data, MYF(0));
2142   my_free((char*)pack_data, MYF(0));
2143 
2144   /* Now check that any Ndb native defaults are aligned
2145      with MySQLD defaults
2146   */
2147   assert(check_default_values(tab) == 0);
2148 
2149   if (error)
2150     goto err;
2151 
2152   DBUG_PRINT("info", ("fetched table %s", tab->getName()));
2153   m_table= tab;
2154 
2155   if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))
2156   {
2157     error= HA_ERR_OUT_OF_MEM;
2158     goto err;
2159   }
2160   if (table_share->primary_key == MAX_KEY)
2161   {
2162     /* Hidden primary key. */
2163     if ((error= add_hidden_pk_ndb_record(dict)) != 0)
2164       goto err;
2165   }
2166 
2167   if ((error= add_table_ndb_record(dict)) != 0)
2168     goto err;
2169 
2170   /*
2171     Approx. write size in bytes over transporter
2172   */
2173   m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
2174 
2175   /* Open indexes */
2176   if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)
2177     goto err;
2178 
2179   /* Read foreign keys where this table is child or parent */
2180   if ((error= get_fk_data(thd, ndb)) != 0)
2181     goto err;
2182 
2183   /*
2184     Backward compatibility for tables created without tablespace
2185     in .frm => read tablespace setting from engine
2186   */
2187   if (table_share->mysql_version < 50120 &&
2188       !table_share->tablespace /* safety */)
2189   {
2190     Uint32 id;
2191     if (tab->getTablespace(&id))
2192     {
2193       NdbDictionary::Tablespace ts= dict->getTablespace(id);
2194       NdbError ndberr= dict->getNdbError();
2195       if (ndberr.classification == NdbError::NoError)
2196       {
2197         const char *tablespace= ts.getName();
2198         const size_t tablespace_len= strlen(tablespace);
2199         if (tablespace_len != 0)
2200         {
2201           DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
2202           table_share->tablespace= strmake_root(&table_share->mem_root,
2203                                                 tablespace,
2204                                                 tablespace_len);
2205         }
2206       }
2207     }
2208   }
2209 
2210   ndbtab_g.release();
2211 
2212   DBUG_RETURN(0);
2213 
2214 err:
2215   ndbtab_g.invalidate();
2216   m_table= NULL;
2217   DBUG_RETURN(error);
2218 }
2219 
fix_unique_index_attr_order(NDB_INDEX_DATA & data,const NDBINDEX * index,KEY * key_info)2220 static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
2221                                        const NDBINDEX *index,
2222                                        KEY *key_info)
2223 {
2224   DBUG_ENTER("fix_unique_index_attr_order");
2225   unsigned sz= index->getNoOfIndexColumns();
2226 
2227   if (data.unique_index_attrid_map)
2228     my_free((char*)data.unique_index_attrid_map, MYF(0));
2229   data.unique_index_attrid_map= (uchar*)my_malloc(PSI_INSTRUMENT_ME, sz,MYF(MY_WME));
2230   if (data.unique_index_attrid_map == 0)
2231   {
2232     sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
2233                     (unsigned int)sz);
2234     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
2235   }
2236 
2237   KEY_PART_INFO* key_part= key_info->key_part;
2238   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2239   assert(key_info->user_defined_key_parts == sz);
2240   for (unsigned i= 0; key_part != end; key_part++, i++)
2241   {
2242     const char *field_name= key_part->field->field_name;
2243 #ifndef NDEBUG
2244    data.unique_index_attrid_map[i]= 255;
2245 #endif
2246     for (unsigned j= 0; j < sz; j++)
2247     {
2248       const NDBCOL *c= index->getColumn(j);
2249       if (strcmp(field_name, c->getName()) == 0)
2250       {
2251         data.unique_index_attrid_map[i]= j;
2252         break;
2253       }
2254     }
2255     assert(data.unique_index_attrid_map[i] != 255);
2256   }
2257   DBUG_RETURN(0);
2258 }
2259 
2260 /*
2261   Create all the indexes for a table.
2262   If any index should fail to be created,
2263   the error is returned immediately
2264 */
create_indexes(THD * thd,Ndb * ndb,TABLE * tab) const2265 int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab) const
2266 {
2267   uint i;
2268   int error= 0;
2269   const char *index_name;
2270   KEY* key_info= tab->key_info;
2271   const char **key_name= tab->s->keynames.type_names;
2272   DBUG_ENTER("ha_ndbcluster::create_indexes");
2273 
2274   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2275   {
2276     index_name= *key_name;
2277     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2278     error= create_index(thd, index_name, key_info, idx_type, i);
2279     if (error)
2280     {
2281       DBUG_PRINT("error", ("Failed to create index %u", i));
2282       break;
2283     }
2284   }
2285 
2286   DBUG_RETURN(error);
2287 }
2288 
ndb_init_index(NDB_INDEX_DATA & data)2289 static void ndb_init_index(NDB_INDEX_DATA &data)
2290 {
2291   data.type= UNDEFINED_INDEX;
2292   data.status= UNDEFINED;
2293   data.unique_index= NULL;
2294   data.index= NULL;
2295   data.unique_index_attrid_map= NULL;
2296   data.ndb_record_key= NULL;
2297   data.ndb_unique_record_key= NULL;
2298   data.ndb_unique_record_row= NULL;
2299 }
2300 
ndb_clear_index(NDBDICT * dict,NDB_INDEX_DATA & data)2301 static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)
2302 {
2303   if (data.unique_index_attrid_map)
2304   {
2305     my_free((char*)data.unique_index_attrid_map, MYF(0));
2306   }
2307   if (data.ndb_unique_record_key)
2308     dict->releaseRecord(data.ndb_unique_record_key);
2309   if (data.ndb_unique_record_row)
2310     dict->releaseRecord(data.ndb_unique_record_row);
2311   if (data.ndb_record_key)
2312     dict->releaseRecord(data.ndb_record_key);
2313   ndb_init_index(data);
2314 }
2315 
2316 static
ndb_protect_char(const char * from,char * to,uint to_length,char protect)2317 void ndb_protect_char(const char* from, char* to, uint to_length, char protect)
2318 {
2319   uint fpos= 0, tpos= 0;
2320 
2321   while(from[fpos] != '\0' && tpos < to_length - 1)
2322   {
2323     if (from[fpos] == protect)
2324     {
2325       int len= 0;
2326       to[tpos++]= '@';
2327       if(tpos < to_length - 5)
2328       {
2329         len= sprintf(to+tpos, "00%u", (uint) protect);
2330         tpos+= len;
2331       }
2332     }
2333     else
2334     {
2335       to[tpos++]= from[fpos];
2336     }
2337     fpos++;
2338   }
2339   to[tpos]= '\0';
2340 }
2341 
2342 /*
2343   Associate a direct reference to an index handle
2344   with an index (for faster access)
2345  */
add_index_handle(THD * thd,NDBDICT * dict,KEY * key_info,const char * key_name,uint index_no)2346 int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
2347                                     const char *key_name, uint index_no)
2348 {
2349   char index_name[FN_LEN + 1];
2350   int error= 0;
2351 
2352   NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
2353   m_index[index_no].type= idx_type;
2354   DBUG_ENTER("ha_ndbcluster::add_index_handle");
2355   DBUG_PRINT("enter", ("table %s", m_tabname));
2356 
2357   ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');
2358   if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
2359   {
2360     DBUG_PRINT("info", ("Get handle to index %s", index_name));
2361     const NDBINDEX *index;
2362     do
2363     {
2364       index= dict->getIndexGlobal(index_name, *m_table);
2365       if (!index)
2366         ERR_RETURN(dict->getNdbError());
2367       DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
2368                           (long) index,
2369                           index->getObjectId(),
2370                           index->getObjectVersion() & 0xFFFFFF,
2371                           index->getObjectVersion() >> 24,
2372                           index->getObjectStatus()));
2373       assert(index->getObjectStatus() ==
2374              NdbDictionary::Object::Retrieved);
2375       break;
2376     } while (1);
2377     m_index[index_no].index= index;
2378   }
2379   if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
2380   {
2381     char unique_index_name[FN_LEN + 1];
2382     static const char* unique_suffix= "$unique";
2383     m_has_unique_index= TRUE;
2384     strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
2385     DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
2386     const NDBINDEX *index;
2387     do
2388     {
2389       index= dict->getIndexGlobal(unique_index_name, *m_table);
2390       if (!index)
2391         ERR_RETURN(dict->getNdbError());
2392       DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
2393                           (long) index,
2394                           index->getObjectId(),
2395                           index->getObjectVersion() & 0xFFFFFF,
2396                           index->getObjectVersion() >> 24,
2397                           index->getObjectStatus()));
2398       assert(index->getObjectStatus() ==
2399              NdbDictionary::Object::Retrieved);
2400       break;
2401     } while (1);
2402     m_index[index_no].unique_index= index;
2403     error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
2404   }
2405 
2406   if (!error)
2407     error= add_index_ndb_record(dict, key_info, index_no);
2408 
2409   if (!error)
2410     m_index[index_no].status= ACTIVE;
2411 
2412   DBUG_RETURN(error);
2413 }
2414 
2415 /*
2416   We use this function to convert null bit masks, as found in class Field,
2417   to bit numbers, as used in NdbRecord.
2418 */
2419 static uint
null_bit_mask_to_bit_number(uchar bit_mask)2420 null_bit_mask_to_bit_number(uchar bit_mask)
2421 {
2422   switch (bit_mask)
2423   {
2424     case  0x1: return 0;
2425     case  0x2: return 1;
2426     case  0x4: return 2;
2427     case  0x8: return 3;
2428     case 0x10: return 4;
2429     case 0x20: return 5;
2430     case 0x40: return 6;
2431     case 0x80: return 7;
2432     default:
2433       assert(false);
2434       return 0;
2435   }
2436 }
2437 
2438 static void
ndb_set_record_specification(uint field_no,NdbDictionary::RecordSpecification * spec,const TABLE * table,const NdbDictionary::Table * ndb_table)2439 ndb_set_record_specification(uint field_no,
2440                              NdbDictionary::RecordSpecification *spec,
2441                              const TABLE *table,
2442                              const NdbDictionary::Table *ndb_table)
2443 {
2444   spec->column= ndb_table->getColumn(field_no);
2445   spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);
2446   if (table->field[field_no]->real_maybe_null())
2447   {
2448     spec->nullbit_byte_offset=
2449       Uint32(table->field[field_no]->null_offset());
2450     spec->nullbit_bit_in_byte=
2451       null_bit_mask_to_bit_number(table->field[field_no]->null_bit);
2452   }
2453   else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)
2454   {
2455     /* We need to store the position of the overflow bits. */
2456     const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);
2457     spec->nullbit_byte_offset=
2458       Uint32(field_bit->bit_ptr - table->record[0]);
2459     spec->nullbit_bit_in_byte= field_bit->bit_ofs;
2460   }
2461   else
2462   {
2463     spec->nullbit_byte_offset= 0;
2464     spec->nullbit_bit_in_byte= 0;
2465   }
2466   spec->column_flags= 0;
2467   if (table->field[field_no]->type() == MYSQL_TYPE_STRING &&
2468       table->field[field_no]->pack_length() == 0)
2469   {
2470     /*
2471       This is CHAR(0), which we represent as
2472       a nullable BIT(1) column where we ignore the data bit
2473     */
2474     spec->column_flags |=
2475         NdbDictionary::RecordSpecification::BitColMapsNullBitOnly;
2476   }
2477 }
2478 
2479 int
add_table_ndb_record(NDBDICT * dict)2480 ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)
2481 {
2482   DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");
2483   NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2484   NdbRecord *rec;
2485   uint i;
2486 
2487   for (i= 0; i < table_share->fields; i++)
2488   {
2489     ndb_set_record_specification(i, &spec[i], table, m_table);
2490   }
2491 
2492   rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),
2493                           NdbDictionary::RecMysqldBitfield |
2494                           NdbDictionary::RecPerColumnFlags);
2495   if (! rec)
2496     ERR_RETURN(dict->getNdbError());
2497   m_ndb_record= rec;
2498 
2499   DBUG_RETURN(0);
2500 }
2501 
2502 /* Create NdbRecord for setting hidden primary key from Uint64. */
2503 int
add_hidden_pk_ndb_record(NDBDICT * dict)2504 ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)
2505 {
2506   DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");
2507   NdbDictionary::RecordSpecification spec[1];
2508   NdbRecord *rec;
2509 
2510   spec[0].column= m_table->getColumn(table_share->fields);
2511   spec[0].offset= 0;
2512   spec[0].nullbit_byte_offset= 0;
2513   spec[0].nullbit_bit_in_byte= 0;
2514 
2515   rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));
2516   if (! rec)
2517     ERR_RETURN(dict->getNdbError());
2518   m_ndb_hidden_key_record= rec;
2519 
2520   DBUG_RETURN(0);
2521 }
2522 
2523 int
add_index_ndb_record(NDBDICT * dict,KEY * key_info,uint index_no)2524 ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)
2525 {
2526   DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");
2527   NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2528   NdbRecord *rec;
2529 
2530   Uint32 offset= 0;
2531   for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2532   {
2533     KEY_PART_INFO *kp= &key_info->key_part[i];
2534 
2535     spec[i].column= m_table->getColumn(kp->fieldnr - 1);
2536     if (! spec[i].column)
2537       ERR_RETURN(dict->getNdbError());
2538     if (kp->null_bit)
2539     {
2540       /* Nullable column. */
2541       spec[i].offset= offset + 1;           // First byte is NULL flag
2542       spec[i].nullbit_byte_offset= offset;
2543       spec[i].nullbit_bit_in_byte= 0;
2544     }
2545     else
2546     {
2547       /* Not nullable column. */
2548       spec[i].offset= offset;
2549       spec[i].nullbit_byte_offset= 0;
2550       spec[i].nullbit_bit_in_byte= 0;
2551     }
2552     offset+= kp->store_length;
2553   }
2554 
2555   if (m_index[index_no].index)
2556   {
2557     /*
2558       Enable MysqldShrinkVarchar flag so that the two-byte length used by
2559       mysqld for short varchar keys is correctly converted into a one-byte
2560       length used by Ndb kernel.
2561     */
2562     rec= dict->createRecord(m_index[index_no].index, m_table,
2563                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2564                             ( NdbDictionary::RecMysqldShrinkVarchar |
2565                               NdbDictionary::RecMysqldBitfield ));
2566     if (! rec)
2567       ERR_RETURN(dict->getNdbError());
2568     m_index[index_no].ndb_record_key= rec;
2569   }
2570   else
2571     m_index[index_no].ndb_record_key= NULL;
2572 
2573   if (m_index[index_no].unique_index)
2574   {
2575     rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2576                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2577                             ( NdbDictionary::RecMysqldShrinkVarchar |
2578                               NdbDictionary::RecMysqldBitfield ));
2579     if (! rec)
2580       ERR_RETURN(dict->getNdbError());
2581     m_index[index_no].ndb_unique_record_key= rec;
2582   }
2583   else if (index_no == table_share->primary_key)
2584   {
2585     /* The primary key is special, there is no explicit NDB index associated. */
2586     rec= dict->createRecord(m_table,
2587                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2588                             ( NdbDictionary::RecMysqldShrinkVarchar |
2589                               NdbDictionary::RecMysqldBitfield ));
2590     if (! rec)
2591       ERR_RETURN(dict->getNdbError());
2592     m_index[index_no].ndb_unique_record_key= rec;
2593   }
2594   else
2595     m_index[index_no].ndb_unique_record_key= NULL;
2596 
2597   /* Now do the same, but this time with offsets from Field, for row access. */
2598   for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2599   {
2600     const KEY_PART_INFO *kp= &key_info->key_part[i];
2601 
2602     spec[i].offset= kp->offset;
2603     if (kp->null_bit)
2604     {
2605       /* Nullable column. */
2606       spec[i].nullbit_byte_offset= kp->null_offset;
2607       spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);
2608     }
2609     else
2610     {
2611       /* Not nullable column. */
2612       spec[i].nullbit_byte_offset= 0;
2613       spec[i].nullbit_bit_in_byte= 0;
2614     }
2615   }
2616 
2617   if (m_index[index_no].unique_index)
2618   {
2619     rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2620                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2621                             NdbDictionary::RecMysqldBitfield);
2622     if (! rec)
2623       ERR_RETURN(dict->getNdbError());
2624     m_index[index_no].ndb_unique_record_row= rec;
2625   }
2626   else if (index_no == table_share->primary_key)
2627   {
2628     rec= dict->createRecord(m_table,
2629                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2630                             NdbDictionary::RecMysqldBitfield);
2631     if (! rec)
2632       ERR_RETURN(dict->getNdbError());
2633     m_index[index_no].ndb_unique_record_row= rec;
2634   }
2635   else
2636     m_index[index_no].ndb_unique_record_row= NULL;
2637 
2638   DBUG_RETURN(0);
2639 }
2640 
2641 /*
2642   Associate index handles for each index of a table
2643 */
open_indexes(THD * thd,Ndb * ndb,TABLE * tab,bool ignore_error)2644 int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,
2645                                 bool ignore_error)
2646 {
2647   uint i;
2648   int error= 0;
2649   NDBDICT *dict= ndb->getDictionary();
2650   KEY* key_info= tab->key_info;
2651   const char **key_name= tab->s->keynames.type_names;
2652   DBUG_ENTER("ha_ndbcluster::open_indexes");
2653   m_has_unique_index= FALSE;
2654   btree_keys.clear_all();
2655   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2656   {
2657     if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
2658     {
2659       if (ignore_error)
2660         m_index[i].index= m_index[i].unique_index= NULL;
2661       else
2662         break;
2663     }
2664     m_index[i].null_in_unique_index= FALSE;
2665     if (check_index_fields_not_null(key_info))
2666       m_index[i].null_in_unique_index= TRUE;
2667 
2668     if (error == 0 && MY_TEST(index_flags(i, 0, 0) & HA_READ_RANGE))
2669       btree_keys.set_bit(i);
2670   }
2671 
2672   if (error && !ignore_error)
2673   {
2674     while (i > 0)
2675     {
2676       i--;
2677       if (m_index[i].index)
2678       {
2679          dict->removeIndexGlobal(*m_index[i].index, 1);
2680          m_index[i].index= NULL;
2681       }
2682       if (m_index[i].unique_index)
2683       {
2684          dict->removeIndexGlobal(*m_index[i].unique_index, 1);
2685          m_index[i].unique_index= NULL;
2686       }
2687     }
2688   }
2689 
2690   assert(error == 0 || error == 4243);
2691 
2692   DBUG_RETURN(error);
2693 }
2694 
2695 /*
2696   Renumber indexes in index list by shifting out
2697   indexes that are to be dropped
2698  */
renumber_indexes(Ndb * ndb,TABLE * tab)2699 void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
2700 {
2701   uint i;
2702   const char *index_name;
2703   KEY* key_info= tab->key_info;
2704   const char **key_name= tab->s->keynames.type_names;
2705   DBUG_ENTER("ha_ndbcluster::renumber_indexes");
2706 
2707   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2708   {
2709     index_name= *key_name;
2710     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2711     m_index[i].type= idx_type;
2712     if (m_index[i].status == TO_BE_DROPPED)
2713     {
2714       DBUG_PRINT("info", ("Shifting index %s(%i) out of the list",
2715                           index_name, i));
2716       NDB_INDEX_DATA tmp;
2717       uint j= i + 1;
2718       // Shift index out of list
2719       while(j != MAX_KEY && m_index[j].status != UNDEFINED)
2720       {
2721         tmp=  m_index[j - 1];
2722         m_index[j - 1]= m_index[j];
2723         m_index[j]= tmp;
2724         j++;
2725       }
2726     }
2727   }
2728 
2729   DBUG_VOID_RETURN;
2730 }
2731 
2732 /*
2733   Drop all indexes that are marked for deletion
2734 */
drop_indexes(Ndb * ndb,TABLE * tab)2735 int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
2736 {
2737   uint i;
2738   int error= 0;
2739   const char *index_name;
2740   KEY* key_info= tab->key_info;
2741   NDBDICT *dict= ndb->getDictionary();
2742   DBUG_ENTER("ha_ndbcluster::drop_indexes");
2743 
2744   for (i= 0; i < tab->s->keys; i++, key_info++)
2745   {
2746     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2747     m_index[i].type= idx_type;
2748     if (m_index[i].status == TO_BE_DROPPED)
2749     {
2750       const NdbDictionary::Index *index= m_index[i].index;
2751       const NdbDictionary::Index *unique_index= m_index[i].unique_index;
2752 
2753       if (index)
2754       {
2755         index_name= index->getName();
2756         DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));
2757         // Drop ordered index from ndb
2758         if (dict->dropIndexGlobal(*index) == 0)
2759         {
2760           dict->removeIndexGlobal(*index, 1);
2761           m_index[i].index= NULL;
2762         }
2763         else
2764         {
2765           error= ndb_to_mysql_error(&dict->getNdbError());
2766           m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
2767         }
2768       }
2769       if (!error && unique_index)
2770       {
2771         index_name= unique_index->getName();
2772         DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
2773         // Drop unique index from ndb
2774         if (dict->dropIndexGlobal(*unique_index) == 0)
2775         {
2776           dict->removeIndexGlobal(*unique_index, 1);
2777           m_index[i].unique_index= NULL;
2778         }
2779         else
2780         {
2781           error=ndb_to_mysql_error(&dict->getNdbError());
2782           m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
2783         }
2784       }
2785       if (error)
2786         DBUG_RETURN(error);
2787       ndb_clear_index(dict, m_index[i]);
2788       continue;
2789     }
2790   }
2791 
2792   DBUG_RETURN(error);
2793 }
2794 
2795 /**
2796   Decode the type of an index from information
2797   provided in table object.
2798 */
get_index_type_from_table(uint inx) const2799 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
2800 {
2801   return get_index_type_from_key(inx, table_share->key_info,
2802                                  inx == table_share->primary_key);
2803 }
2804 
get_index_type_from_key(uint inx,KEY * key_info,bool primary) const2805 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
2806                                                       KEY *key_info,
2807                                                       bool primary) const
2808 {
2809   bool is_hash_index=  (key_info[inx].algorithm ==
2810                         HA_KEY_ALG_HASH);
2811   if (primary)
2812     return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
2813 
2814   return ((key_info[inx].flags & HA_NOSAME) ?
2815           (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
2816           ORDERED_INDEX);
2817 }
2818 
check_index_fields_not_null(KEY * key_info) const2819 bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info) const
2820 {
2821   KEY_PART_INFO* key_part= key_info->key_part;
2822   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2823   DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
2824 
2825   for (; key_part != end; key_part++)
2826     {
2827       Field* field= key_part->field;
2828       if (field->maybe_null())
2829 	DBUG_RETURN(TRUE);
2830     }
2831 
2832   DBUG_RETURN(FALSE);
2833 }
2834 
release_metadata(THD * thd,Ndb * ndb)2835 void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
2836 {
2837   uint i;
2838 
2839   DBUG_ENTER("release_metadata");
2840   DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
2841 
2842   NDBDICT *dict= ndb->getDictionary();
2843   int invalidate_indexes= 0;
2844   if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
2845   {
2846     invalidate_indexes = 1;
2847   }
2848   if (m_table != NULL)
2849   {
2850     if (m_ndb_record != NULL)
2851     {
2852       dict->releaseRecord(m_ndb_record);
2853       m_ndb_record= NULL;
2854     }
2855     if (m_ndb_hidden_key_record != NULL)
2856     {
2857       dict->releaseRecord(m_ndb_hidden_key_record);
2858       m_ndb_hidden_key_record= NULL;
2859     }
2860     if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
2861       invalidate_indexes= 1;
2862     dict->removeTableGlobal(*m_table, invalidate_indexes);
2863   }
2864   // TODO investigate
2865   assert(m_table_info == NULL);
2866   m_table_info= NULL;
2867 
2868   // Release index list
2869   for (i= 0; i < MAX_KEY; i++)
2870   {
2871     if (m_index[i].unique_index)
2872     {
2873       assert(m_table != NULL);
2874       dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
2875     }
2876     if (m_index[i].index)
2877     {
2878       assert(m_table != NULL);
2879       dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
2880     }
2881     ndb_clear_index(dict, m_index[i]);
2882   }
2883 
2884   // Release FK data
2885   release_fk_data(thd);
2886 
2887   m_table= NULL;
2888   DBUG_VOID_RETURN;
2889 }
2890 
2891 
2892 /*
2893   Map from thr_lock_type to NdbOperation::LockMode
2894 */
2895 static inline
get_ndb_lock_mode(enum thr_lock_type type)2896 NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)
2897 {
2898   if (type >= TL_WRITE_ALLOW_WRITE)
2899     return NdbOperation::LM_Exclusive;
2900   if (type ==  TL_READ_WITH_SHARED_LOCKS)
2901     return NdbOperation::LM_Read;
2902   return NdbOperation::LM_CommittedRead;
2903 }
2904 
2905 
2906 static const ulong index_type_flags[]=
2907 {
2908   /* UNDEFINED_INDEX */
2909   0,
2910 
2911   /* PRIMARY_KEY_INDEX */
2912   HA_ONLY_WHOLE_INDEX,
2913 
2914   /* PRIMARY_KEY_ORDERED_INDEX */
2915   /*
2916      Enable HA_KEYREAD_ONLY when "sorted" indexes are supported,
2917      thus ORDER BY clauses can be optimized by reading directly
2918      through the index.
2919   */
2920   // HA_KEYREAD_ONLY |
2921   HA_READ_NEXT |
2922   HA_READ_PREV |
2923   HA_READ_RANGE |
2924   HA_READ_ORDER,
2925 
2926   /* UNIQUE_INDEX */
2927   HA_ONLY_WHOLE_INDEX,
2928 
2929   /* UNIQUE_ORDERED_INDEX */
2930   HA_READ_NEXT |
2931   HA_READ_PREV |
2932   HA_READ_RANGE |
2933   HA_READ_ORDER,
2934 
2935   /* ORDERED_INDEX */
2936   HA_READ_NEXT |
2937   HA_READ_PREV |
2938   HA_READ_RANGE |
2939   HA_READ_ORDER
2940 };
2941 
2942 static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);
2943 
get_index_type(uint idx_no) const2944 inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
2945 {
2946   assert(idx_no < MAX_KEY);
2947   return m_index[idx_no].type;
2948 }
2949 
has_null_in_unique_index(uint idx_no) const2950 inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
2951 {
2952   assert(idx_no < MAX_KEY);
2953   return m_index[idx_no].null_in_unique_index;
2954 }
2955 
2956 
2957 /**
2958   Get the flags for an index.
2959 
2960   @return
2961     flags depending on the type of the index.
2962 */
2963 
index_flags(uint idx_no,uint part,bool all_parts) const2964 inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
2965                                         bool all_parts) const
2966 {
2967   DBUG_ENTER("ha_ndbcluster::index_flags");
2968   DBUG_PRINT("enter", ("idx_no: %u", idx_no));
2969   assert(get_index_type_from_table(idx_no) < index_flags_size);
2970   DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] |
2971               HA_KEY_SCAN_NOT_ROR);
2972 }
2973 
2974 bool
primary_key_is_clustered() const2975 ha_ndbcluster::primary_key_is_clustered() const
2976 {
2977 
2978   if (table->s->primary_key == MAX_KEY)
2979     return false;
2980 
2981   /*
2982     NOTE 1: our ordered indexes are not really clustered
2983     but since accesing data when scanning index is free
2984     it's a good approximation
2985 
2986     NOTE 2: We really should consider DD attributes here too
2987     (for which there is IO to read data when scanning index)
2988     but that will need to be handled later...
2989   */
2990   const ndb_index_type idx_type =
2991     get_index_type_from_table(table->s->primary_key);
2992   return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||
2993           idx_type == UNIQUE_ORDERED_INDEX ||
2994           idx_type == ORDERED_INDEX);
2995 }
2996 
check_index_fields_in_write_set(uint keyno)2997 bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)
2998 {
2999   KEY* key_info= table->key_info + keyno;
3000   KEY_PART_INFO* key_part= key_info->key_part;
3001   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
3002   uint i;
3003   DBUG_ENTER("check_index_fields_in_write_set");
3004 
3005   for (i= 0; key_part != end; key_part++, i++)
3006   {
3007     Field* field= key_part->field;
3008     if (!bitmap_is_set(table->write_set, field->field_index))
3009     {
3010       DBUG_RETURN(false);
3011     }
3012   }
3013 
3014   DBUG_RETURN(true);
3015 }
3016 
3017 
3018 /**
3019   Read one record from NDB using primary key.
3020 */
3021 
pk_read(const uchar * key,uint key_len,uchar * buf,uint32 * part_id)3022 int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,
3023                            uint32 *part_id)
3024 {
3025   NdbConnection *trans= m_thd_ndb->trans;
3026   int res;
3027   DBUG_ENTER("pk_read");
3028   DBUG_PRINT("enter", ("key_len: %u read_set=%x",
3029                        key_len, table->read_set->bitmap[0]));
3030   DBUG_DUMP("key", key, key_len);
3031   assert(trans);
3032 
3033   NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3034 
3035   if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess,
3036                         table->s->primary_key))
3037   {
3038     // Is parent of pushed join
3039     assert(lm == NdbOperation::LM_CommittedRead);
3040     const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,
3041                                                      (m_user_defined_partitioning ?
3042                                                      part_id : NULL));
3043     if (unlikely(error))
3044       DBUG_RETURN(error);
3045 
3046     assert(m_active_query!=NULL);
3047     if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3048         m_active_query->getNdbError().code)
3049     {
3050       table->status= STATUS_NOT_FOUND;
3051       DBUG_RETURN(ndb_err(trans));
3052     }
3053 
3054     int result= fetch_next_pushed();
3055     if (result == NdbQuery::NextResult_gotRow)
3056     {
3057       DBUG_RETURN(0);
3058     }
3059     else if (result == NdbQuery::NextResult_scanComplete)
3060     {
3061       DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3062     }
3063     else
3064     {
3065       DBUG_RETURN(ndb_err(trans));
3066     }
3067   }
3068   else
3069   {
3070     if (m_pushed_join_operation == PUSHED_ROOT)
3071     {
3072       m_thd_ndb->m_pushed_queries_dropped++;
3073     }
3074 
3075     const NdbOperation *op;
3076     if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,
3077                                        (m_user_defined_partitioning ?
3078                                         part_id :
3079                                         NULL))))
3080       ERR_RETURN(trans->getNdbError());
3081 
3082     if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3083         op->getNdbError().code)
3084     {
3085       table->status= STATUS_NOT_FOUND;
3086       DBUG_RETURN(ndb_err(trans));
3087     }
3088     table->status= 0;
3089     DBUG_RETURN(0);
3090   }
3091 }
3092 
3093 /**
3094   Update primary key or part id by doing delete insert.
3095 */
3096 
ndb_pk_update_row(THD * thd,const uchar * old_data,uchar * new_data)3097 int ha_ndbcluster::ndb_pk_update_row(THD *thd,
3098                                      const uchar *old_data, uchar *new_data)
3099 {
3100   NdbTransaction *trans= m_thd_ndb->trans;
3101   int error;
3102   DBUG_ENTER("ndb_pk_update_row");
3103   assert(trans);
3104 
3105   DBUG_PRINT("info", ("primary key update or partition change, "
3106                       "doing delete+insert"));
3107 
3108 #ifndef NDEBUG
3109   /*
3110    * 'old_data' contain colums as specified in 'read_set'.
3111    * All PK columns must be included for ::ndb_delete_row()
3112    */
3113   assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
3114   /*
3115    * As a complete 'new_data' row is reinserted after the delete,
3116    * all columns must be contained in the read+write union.
3117    */
3118   bitmap_copy(&m_bitmap, table->read_set);
3119   bitmap_union(&m_bitmap, table->write_set);
3120   assert(bitmap_is_set_all(&m_bitmap));
3121 #endif
3122 
3123   // Delete old row
3124   error= ndb_delete_row(old_data, TRUE);
3125   if (error)
3126   {
3127     DBUG_PRINT("info", ("delete failed"));
3128     DBUG_RETURN(error);
3129   }
3130 
3131   // Insert new row
3132   DBUG_PRINT("info", ("delete succeded"));
3133   bool batched_update= (m_active_cursor != 0);
3134   /*
3135     If we are updating a primary key with auto_increment
3136     then we need to update the auto_increment counter
3137   */
3138   if (table->found_next_number_field &&
3139       bitmap_is_set(table->write_set,
3140                     table->found_next_number_field->field_index) &&
3141       (error= set_auto_inc(thd, table->found_next_number_field)))
3142   {
3143     DBUG_RETURN(error);
3144   }
3145 
3146   /*
3147     We are mapping a MySQLD PK changing update to an NdbApi delete
3148     and insert.
3149     The original PK changing update may not have written new values
3150     to all columns, so the write set may be partial.
3151     We set the write set to be all columns so that all values are
3152     copied from the old row to the new row.
3153   */
3154   my_bitmap_map *old_map=
3155     tmp_use_all_columns(table, table->write_set);
3156   error= ndb_write_row(new_data, TRUE, batched_update);
3157   tmp_restore_column_map(table->write_set, old_map);
3158 
3159   if (error)
3160   {
3161     DBUG_PRINT("info", ("insert failed"));
3162     if (trans->commitStatus() == NdbConnection::Started)
3163     {
3164       if (thd->slave_thread)
3165         g_ndb_slave_state.atTransactionAbort();
3166       m_thd_ndb->m_unsent_bytes= 0;
3167       m_thd_ndb->m_execute_count++;
3168       DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));
3169       trans->execute(NdbTransaction::Rollback);
3170 #ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA
3171       int undo_res;
3172       // Undo delete_row(old_data)
3173       undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);
3174       if (undo_res)
3175         push_warning(table->in_use,
3176                      Sql_condition::SL_WARNING,
3177                      undo_res,
3178                      "NDB failed undoing delete at primary key update");
3179 #endif
3180     }
3181     DBUG_RETURN(error);
3182   }
3183   DBUG_PRINT("info", ("delete+insert succeeded"));
3184 
3185   DBUG_RETURN(0);
3186 }
3187 
3188 /**
3189   Check that all operations between first and last all
3190   have gotten the errcode
3191   If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
3192   for all succeeding operations
3193 */
check_all_operations_for_error(NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint errcode)3194 bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
3195                                                    const NdbOperation *first,
3196                                                    const NdbOperation *last,
3197                                                    uint errcode)
3198 {
3199   const NdbOperation *op= first;
3200   DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");
3201 
3202   while(op)
3203   {
3204     NdbError err= op->getNdbError();
3205     if (err.status != NdbError::Success)
3206     {
3207       if (ndb_to_mysql_error(&err) != (int) errcode)
3208         DBUG_RETURN(FALSE);
3209       if (op == last) break;
3210       op= trans->getNextCompletedOperation(op);
3211     }
3212     else
3213     {
3214       // We found a duplicate
3215       if (op->getType() == NdbOperation::UniqueIndexAccess)
3216       {
3217         if (errcode == HA_ERR_KEY_NOT_FOUND)
3218         {
3219           NdbIndexOperation *iop= (NdbIndexOperation *) op;
3220           const NDBINDEX *index= iop->getIndex();
3221           // Find the key_no of the index
3222           for(uint i= 0; i<table->s->keys; i++)
3223           {
3224             if (m_index[i].unique_index == index)
3225             {
3226               m_dupkey= i;
3227               break;
3228             }
3229           }
3230         }
3231       }
3232       else
3233       {
3234         // Must have been primary key access
3235         assert(op->getType() == NdbOperation::PrimaryKeyAccess);
3236         if (errcode == HA_ERR_KEY_NOT_FOUND)
3237           m_dupkey= table->s->primary_key;
3238       }
3239       DBUG_RETURN(FALSE);
3240     }
3241   }
3242   DBUG_RETURN(TRUE);
3243 }
3244 
3245 
3246 /**
3247  * Check if record contains any null valued columns that are part of a key
3248  */
3249 static
3250 int
check_null_in_record(const KEY * key_info,const uchar * record)3251 check_null_in_record(const KEY* key_info, const uchar *record)
3252 {
3253   KEY_PART_INFO *curr_part, *end_part;
3254   curr_part= key_info->key_part;
3255   end_part= curr_part + key_info->user_defined_key_parts;
3256 
3257   while (curr_part != end_part)
3258   {
3259     if (curr_part->null_bit &&
3260         (record[curr_part->null_offset] & curr_part->null_bit))
3261       return 1;
3262     curr_part++;
3263   }
3264   return 0;
3265   /*
3266     We could instead pre-compute a bitmask in table_share with one bit for
3267     every null-bit in the key, and so check this just by OR'ing the bitmask
3268     with the null bitmap in the record.
3269     But not sure it's worth it.
3270   */
3271 }
3272 
3273 /* Empty mask and dummy row, for reading no attributes using NdbRecord. */
3274 /* Mask will be initialized to all zeros by linker. */
3275 static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];
3276 static char dummy_row[1];
3277 
3278 /**
3279   Peek to check if any rows already exist with conflicting
3280   primary key or unique index values
3281 */
3282 
peek_indexed_rows(const uchar * record,NDB_WRITE_OP write_op)3283 int ha_ndbcluster::peek_indexed_rows(const uchar *record,
3284                                      NDB_WRITE_OP write_op)
3285 {
3286   NdbTransaction *trans;
3287   const NdbOperation *op;
3288   const NdbOperation *first, *last;
3289   NdbOperation::OperationOptions options;
3290   NdbOperation::OperationOptions *poptions=NULL;
3291   options.optionsPresent = 0;
3292   uint i;
3293   int res, error;
3294   DBUG_ENTER("peek_indexed_rows");
3295   if (unlikely(!(trans= get_transaction(error))))
3296   {
3297     DBUG_RETURN(error);
3298   }
3299   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
3300   first= NULL;
3301   if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)
3302   {
3303     /*
3304      * Fetch any row with colliding primary key
3305      */
3306     const NdbRecord *key_rec=
3307       m_index[table->s->primary_key].ndb_unique_record_row;
3308 
3309     if (m_user_defined_partitioning)
3310     {
3311       uint32 part_id;
3312       int error;
3313       longlong func_value;
3314       my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3315       error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
3316       dbug_tmp_restore_column_map(table->read_set, old_map);
3317       if (error)
3318       {
3319         m_part_info->err_value= func_value;
3320         DBUG_RETURN(error);
3321       }
3322       options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3323       options.partitionId=part_id;
3324       poptions=&options;
3325     }
3326 
3327     if (!(op= trans->readTuple(key_rec, (const char *)record,
3328                                m_ndb_record, dummy_row, lm, empty_mask,
3329                                poptions,
3330                                sizeof(NdbOperation::OperationOptions))))
3331       ERR_RETURN(trans->getNdbError());
3332 
3333     first= op;
3334   }
3335   /*
3336    * Fetch any rows with colliding unique indexes
3337    */
3338   KEY* key_info;
3339   for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
3340   {
3341     if (i != table_share->primary_key &&
3342         key_info->flags & HA_NOSAME &&
3343         bitmap_is_overlapping(table->write_set, m_key_fields[i]))
3344     {
3345       /*
3346         A unique index is defined on table and it's being updated
3347         We cannot look up a NULL field value in a unique index. But since
3348         keys with NULLs are not indexed, such rows cannot conflict anyway, so
3349         we just skip the index in this case.
3350       */
3351       if (check_null_in_record(key_info, record))
3352       {
3353         DBUG_PRINT("info", ("skipping check for key with NULL"));
3354         continue;
3355       }
3356       if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))
3357       {
3358         DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));
3359         continue;
3360       }
3361 
3362       const NdbOperation *iop;
3363       const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;
3364       if (!(iop= trans->readTuple(key_rec, (const char *)record,
3365                                   m_ndb_record, dummy_row,
3366                                   lm, empty_mask)))
3367         ERR_RETURN(trans->getNdbError());
3368 
3369       if (!first)
3370         first= iop;
3371     }
3372   }
3373   last= trans->getLastDefinedOperation();
3374   if (first)
3375     res= execute_no_commit_ie(m_thd_ndb, trans);
3376   else
3377   {
3378     // Table has no keys
3379     table->status= STATUS_NOT_FOUND;
3380     DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3381   }
3382   const NdbError ndberr= trans->getNdbError();
3383   error= ndberr.mysql_code;
3384   if ((error != 0 && error != HA_ERR_KEY_NOT_FOUND) ||
3385       check_all_operations_for_error(trans, first, last,
3386                                      HA_ERR_KEY_NOT_FOUND))
3387   {
3388     table->status= STATUS_NOT_FOUND;
3389     DBUG_RETURN(ndb_err(trans));
3390   }
3391   else
3392   {
3393     DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
3394   }
3395   DBUG_RETURN(0);
3396 }
3397 
3398 
3399 /**
3400   Read one record from NDB using unique secondary index.
3401 */
3402 
unique_index_read(const uchar * key,uint key_len,uchar * buf)3403 int ha_ndbcluster::unique_index_read(const uchar *key,
3404                                      uint key_len, uchar *buf)
3405 {
3406   NdbTransaction *trans= m_thd_ndb->trans;
3407   DBUG_ENTER("ha_ndbcluster::unique_index_read");
3408   DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
3409   DBUG_DUMP("key", key, key_len);
3410   assert(trans);
3411 
3412   NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3413 
3414   if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess,
3415                         active_index))
3416   {
3417     assert(lm == NdbOperation::LM_CommittedRead);
3418     const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);
3419     if (unlikely(error))
3420       DBUG_RETURN(error);
3421 
3422     assert(m_active_query!=NULL);
3423     if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3424         m_active_query->getNdbError().code)
3425     {
3426       table->status= STATUS_GARBAGE;
3427       DBUG_RETURN(ndb_err(trans));
3428     }
3429 
3430     int result= fetch_next_pushed();
3431     if (result == NdbQuery::NextResult_gotRow)
3432     {
3433       DBUG_RETURN(0);
3434     }
3435     else if (result == NdbQuery::NextResult_scanComplete)
3436     {
3437       DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3438     }
3439     else
3440     {
3441       DBUG_RETURN(ndb_err(trans));
3442     }
3443   }
3444   else
3445   {
3446     if (m_pushed_join_operation == PUSHED_ROOT)
3447     {
3448       m_thd_ndb->m_pushed_queries_dropped++;
3449     }
3450 
3451     const NdbOperation *op;
3452 
3453     if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))
3454       ERR_RETURN(trans->getNdbError());
3455 
3456     if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3457         op->getNdbError().code)
3458     {
3459       int err= ndb_err(trans);
3460       if(err==HA_ERR_KEY_NOT_FOUND)
3461         table->status= STATUS_NOT_FOUND;
3462       else
3463         table->status= STATUS_GARBAGE;
3464 
3465       DBUG_RETURN(err);
3466     }
3467 
3468     table->status= 0;
3469     DBUG_RETURN(0);
3470   }
3471 }
3472 
3473 int
scan_handle_lock_tuple(NdbScanOperation * scanOp,NdbTransaction * trans)3474 ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,
3475                                       NdbTransaction *trans)
3476 {
3477   DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");
3478   if (m_lock_tuple)
3479   {
3480     /*
3481       Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
3482       (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
3483       LOCK WITH SHARE MODE) and row was not explictly unlocked
3484       with unlock_row() call
3485     */
3486     DBUG_PRINT("info", ("Keeping lock on scanned row"));
3487 
3488     if (!(scanOp->lockCurrentTuple(trans, m_ndb_record,
3489                                    dummy_row, empty_mask)))
3490     {
3491       m_lock_tuple= false;
3492       ERR_RETURN(trans->getNdbError());
3493     }
3494 
3495     /* Perform 'empty update' to mark the read in the binlog, iff required */
3496     /*
3497      * Lock_mode = exclusive
3498      * Session_state = marking_exclusive_reads
3499      * THEN
3500      * issue updateCurrentTuple with AnyValue explicitly set
3501      */
3502     if ((m_lock.type >= TL_WRITE_ALLOW_WRITE) &&
3503         ndb_log_exclusive_reads(current_thd))
3504     {
3505       if (scan_log_exclusive_read(scanOp, trans))
3506       {
3507         m_lock_tuple= false;
3508         ERR_RETURN(trans->getNdbError());
3509       }
3510     }
3511 
3512     m_thd_ndb->m_unsent_bytes+=12;
3513     m_lock_tuple= false;
3514   }
3515   DBUG_RETURN(0);
3516 }
3517 
fetch_next(NdbScanOperation * cursor)3518 inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
3519 {
3520   DBUG_ENTER("fetch_next");
3521   int local_check;
3522   int error;
3523   NdbTransaction *trans= m_thd_ndb->trans;
3524 
3525   assert(trans);
3526   if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
3527     DBUG_RETURN(error);
3528 
3529   bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
3530                     m_lock.type != TL_READ_WITH_SHARED_LOCKS;
3531   do {
3532     DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
3533     /*
3534       We can only handle one tuple with blobs at a time.
3535     */
3536     if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)
3537     {
3538       if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3539         DBUG_RETURN(ndb_err(trans));
3540     }
3541 
3542     /* Should be no unexamined completed operations
3543        nextResult() on Blobs generates Blob part read ops,
3544        so we will free them here
3545     */
3546     release_completed_operations(trans);
3547 
3548     if ((local_check= cursor->nextResult(&_m_next_row,
3549                                          contact_ndb,
3550                                          m_thd_ndb->m_force_send)) == 0)
3551     {
3552       /*
3553 	Explicitly lock tuple if "select for update" or
3554 	"select lock in share mode"
3555       */
3556       m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
3557 		     ||
3558 		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
3559       DBUG_RETURN(0);
3560     }
3561     else if (local_check == 1 || local_check == 2)
3562     {
3563       // 1: No more records
3564       // 2: No more cached records
3565 
3566       /*
3567         Before fetching more rows and releasing lock(s),
3568         all pending update or delete operations should
3569         be sent to NDB
3570       */
3571       DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
3572                           (long) m_thd_ndb->m_unsent_bytes));
3573       if (m_thd_ndb->m_unsent_bytes)
3574       {
3575         if ((error = flush_bulk_insert()) != 0)
3576           DBUG_RETURN(error);
3577       }
3578       contact_ndb= (local_check == 2);
3579     }
3580     else
3581     {
3582       DBUG_RETURN(ndb_err(trans));
3583     }
3584   } while (local_check == 2);
3585 
3586   DBUG_RETURN(1);
3587 }
3588 
fetch_next_pushed()3589 int ha_ndbcluster::fetch_next_pushed()
3590 {
3591   DBUG_ENTER("fetch_next_pushed (from pushed operation)");
3592 
3593   assert(m_pushed_operation);
3594   NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);
3595 
3596   /**
3597    * Only prepare result & status from this operation in pushed join.
3598    * Consecutive rows are prepared through ::index_read_pushed() and
3599    * ::index_next_pushed() which unpack and set correct status for each row.
3600    */
3601   if (result == NdbQuery::NextResult_gotRow)
3602   {
3603     assert(m_next_row!=NULL);
3604     DBUG_PRINT("info", ("One more record found"));
3605     table->status= 0;
3606     unpack_record(table->record[0], m_next_row);
3607 //  m_thd_ndb->m_pushed_reads++;
3608 //  DBUG_RETURN(0)
3609   }
3610   else if (result == NdbQuery::NextResult_scanComplete)
3611   {
3612     assert(m_next_row==NULL);
3613     DBUG_PRINT("info", ("No more records"));
3614     table->status= STATUS_NOT_FOUND;
3615 //  m_thd_ndb->m_pushed_reads++;
3616 //  DBUG_RETURN(HA_ERR_END_OF_FILE);
3617   }
3618   else
3619   {
3620     DBUG_PRINT("info", ("Error from 'nextResult()'"));
3621     table->status= STATUS_GARBAGE;
3622 //  assert(false);
3623     DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3624   }
3625   DBUG_RETURN(result);
3626 }
3627 
3628 /**
3629   Get the first record from an indexed table access being a child
3630   operation in a pushed join. Fetch will be from prefetched
3631   cached records which are materialized into the bound buffer
3632   areas as result of this call.
3633 */
3634 
3635 int
index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3636 ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,
3637                                  key_part_map keypart_map)
3638 {
3639   DBUG_ENTER("index_read_pushed");
3640 
3641   // Handler might have decided to not execute the pushed joins which has been prepared
3642   // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3643   if (unlikely(!check_is_pushed()))
3644   {
3645     int res= index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT);
3646     if (!res && table->vfield)
3647       res= update_generated_read_fields(buf, table);
3648     DBUG_RETURN(res);
3649   }
3650 
3651   // Might need to re-establish first result row (wrt. its parents which may have been navigated)
3652   NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();
3653 
3654   // Result from pushed operation will be referred by 'm_next_row' if non-NULL
3655   if (result == NdbQuery::NextResult_gotRow)
3656   {
3657     assert(m_next_row!=NULL);
3658     unpack_record(buf, m_next_row);
3659     table->status= 0;
3660     m_thd_ndb->m_pushed_reads++;
3661   }
3662   else
3663   {
3664     assert(result!=NdbQuery::NextResult_gotRow);
3665     table->status= STATUS_NOT_FOUND;
3666     DBUG_PRINT("info", ("No record found"));
3667 //  m_thd_ndb->m_pushed_reads++;
3668 //  DBUG_RETURN(HA_ERR_END_OF_FILE);
3669   }
3670   DBUG_RETURN(0);
3671 }
3672 
3673 
3674 /**
3675   Get the next record from an indexes table access being a child
3676   operation in a pushed join. Fetch will be from prefetched
3677   cached records which are materialized into the bound buffer
3678   areas as result of this call.
3679 */
index_next_pushed(uchar * buf)3680 int ha_ndbcluster::index_next_pushed(uchar *buf)
3681 {
3682   DBUG_ENTER("index_next_pushed");
3683 
3684   // Handler might have decided to not execute the pushed joins which has been prepared
3685   // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3686   if (unlikely(!check_is_pushed()))
3687   {
3688     int res= index_next(buf);
3689     if (!res && table->vfield)
3690       res= update_generated_read_fields(buf, table);
3691     DBUG_RETURN(res);
3692   }
3693 
3694   assert(m_pushed_join_operation>PUSHED_ROOT);  // Child of a pushed join
3695   assert(m_active_query==NULL);
3696 
3697   int res = fetch_next_pushed();
3698   if (res == NdbQuery::NextResult_gotRow)
3699   {
3700     DBUG_RETURN(0);
3701   }
3702   else if (res == NdbQuery::NextResult_scanComplete)
3703   {
3704     DBUG_RETURN(HA_ERR_END_OF_FILE);
3705   }
3706   else
3707   {
3708     DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3709   }
3710 }
3711 
3712 
3713 /**
3714   Get the next record of a started scan. Try to fetch
3715   it locally from NdbApi cached records if possible,
3716   otherwise ask NDB for more.
3717 
3718   @note
3719     If this is a update/delete make sure to not contact
3720     NDB before any pending ops have been sent to NDB.
3721 */
3722 
next_result(uchar * buf)3723 inline int ha_ndbcluster::next_result(uchar *buf)
3724 {
3725   int res;
3726   DBUG_ENTER("next_result");
3727 
3728   if (m_active_cursor)
3729   {
3730     if ((res= fetch_next(m_active_cursor)) == 0)
3731     {
3732       DBUG_PRINT("info", ("One more record found"));
3733 
3734       unpack_record(buf, m_next_row);
3735       table->status= 0;
3736       DBUG_RETURN(0);
3737     }
3738     else if (res == 1)
3739     {
3740       // No more records
3741       table->status= STATUS_NOT_FOUND;
3742 
3743       DBUG_PRINT("info", ("No more records"));
3744       DBUG_RETURN(HA_ERR_END_OF_FILE);
3745     }
3746     else
3747     {
3748       DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3749     }
3750   }
3751   else if (m_active_query)
3752   {
3753     res= fetch_next_pushed();
3754     if (res == NdbQuery::NextResult_gotRow)
3755     {
3756       DBUG_RETURN(0);
3757     }
3758     else if (res == NdbQuery::NextResult_scanComplete)
3759     {
3760       DBUG_RETURN(HA_ERR_END_OF_FILE);
3761     }
3762     else
3763     {
3764       DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3765     }
3766   }
3767   else
3768     DBUG_RETURN(HA_ERR_END_OF_FILE);
3769 }
3770 
3771 int
log_exclusive_read(const NdbRecord * key_rec,const uchar * key,uchar * buf,Uint32 * ppartition_id)3772 ha_ndbcluster::log_exclusive_read(const NdbRecord *key_rec,
3773                                   const uchar *key,
3774                                   uchar *buf,
3775                                   Uint32 *ppartition_id)
3776 {
3777   DBUG_ENTER("log_exclusive_read");
3778   NdbOperation::OperationOptions opts;
3779   opts.optionsPresent=
3780     NdbOperation::OperationOptions::OO_ABORTOPTION |
3781     NdbOperation::OperationOptions::OO_ANYVALUE;
3782 
3783   /* If the key does not exist, that is ok */
3784   opts.abortOption= NdbOperation::AO_IgnoreError;
3785 
3786   /*
3787      Mark the AnyValue as a read operation, so that the update
3788      is processed
3789   */
3790   opts.anyValue= 0;
3791   ndbcluster_anyvalue_set_read_op(opts.anyValue);
3792 
3793   if (ppartition_id != NULL)
3794   {
3795     assert(m_user_defined_partitioning);
3796     opts.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3797     opts.partitionId= *ppartition_id;
3798   }
3799 
3800   const NdbOperation* markingOp=
3801     m_thd_ndb->trans->updateTuple(key_rec,
3802                                   (const char*) key,
3803                                   m_ndb_record,
3804                                   (char*)buf,
3805                                   empty_mask,
3806                                   &opts,
3807                                   opts.size());
3808   if (!markingOp)
3809   {
3810     char msg[FN_REFLEN];
3811     my_snprintf(msg, sizeof(msg), "Error logging exclusive reads, failed creating markingOp, %u, %s\n",
3812                 m_thd_ndb->trans->getNdbError().code,
3813                 m_thd_ndb->trans->getNdbError().message);
3814     push_warning_printf(current_thd, Sql_condition::SL_WARNING,
3815                         ER_EXCEPTIONS_WRITE_ERROR,
3816                         ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
3817     /*
3818       By returning -1 the caller (pk_unique_index_read_key) will return
3819       NULL and error on transaction object will be returned.
3820     */
3821     DBUG_RETURN(-1);
3822   }
3823 
3824   DBUG_RETURN(0);
3825 }
3826 
3827 int
scan_log_exclusive_read(NdbScanOperation * cursor,NdbTransaction * trans)3828 ha_ndbcluster::scan_log_exclusive_read(NdbScanOperation *cursor,
3829                                        NdbTransaction *trans)
3830 {
3831   DBUG_ENTER("ha_ndbcluster::scan_log_exclusive_read");
3832   NdbOperation::OperationOptions opts;
3833   opts.optionsPresent= NdbOperation::OperationOptions::OO_ANYVALUE;
3834 
3835   /*
3836      Mark the AnyValue as a read operation, so that the update
3837      is processed
3838   */
3839   opts.anyValue= 0;
3840   ndbcluster_anyvalue_set_read_op(opts.anyValue);
3841 
3842   const NdbOperation* markingOp=
3843     cursor->updateCurrentTuple(trans, m_ndb_record,
3844                                dummy_row, empty_mask,
3845                                &opts,
3846                                sizeof(NdbOperation::OperationOptions));
3847   if (markingOp == NULL)
3848   {
3849     char msg[FN_REFLEN];
3850     my_snprintf(msg, sizeof(msg), "Error logging exclusive reads during scan, failed creating markingOp, %u, %s\n",
3851                 m_thd_ndb->trans->getNdbError().code,
3852                 m_thd_ndb->trans->getNdbError().message);
3853     push_warning_printf(current_thd, Sql_condition::SL_WARNING,
3854                         ER_EXCEPTIONS_WRITE_ERROR,
3855                         ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
3856     DBUG_RETURN(-1);
3857   }
3858 
3859   DBUG_RETURN(0);
3860 }
3861 
3862 /**
3863   Do a primary key or unique key index read operation.
3864   The key value is taken from a buffer in mysqld key format.
3865 */
3866 const NdbOperation *
pk_unique_index_read_key(uint idx,const uchar * key,uchar * buf,NdbOperation::LockMode lm,Uint32 * ppartition_id)3867 ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,
3868                                         NdbOperation::LockMode lm,
3869                                         Uint32 *ppartition_id)
3870 {
3871   DBUG_ENTER("pk_unique_index_read_key");
3872   const NdbOperation *op;
3873   const NdbRecord *key_rec;
3874   NdbOperation::OperationOptions options;
3875   NdbOperation::OperationOptions *poptions = NULL;
3876   options.optionsPresent= 0;
3877   NdbOperation::GetValueSpec gets[2];
3878   ndb_index_type idx_type=
3879     (idx != MAX_KEY)?
3880     get_index_type(idx)
3881     : UNDEFINED_INDEX;
3882 
3883   assert(m_thd_ndb->trans);
3884 
3885   DBUG_PRINT("info", ("pk_unique_index_read_key of table %s", table->s->table_name.str));
3886 
3887   if (idx != MAX_KEY)
3888     key_rec= m_index[idx].ndb_unique_record_key;
3889   else
3890     key_rec= m_ndb_hidden_key_record;
3891 
3892   /* Initialize the null bitmap, setting unused null bits to 1. */
3893   memset(buf, 0xff, table->s->null_bytes);
3894 
3895   if (table_share->primary_key == MAX_KEY)
3896   {
3897     get_hidden_fields_keyop(&options, gets);
3898     poptions= &options;
3899   }
3900   get_read_set(false, idx);
3901 
3902   if (ppartition_id != NULL)
3903   {
3904     assert(m_user_defined_partitioning);
3905     options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3906     options.partitionId= *ppartition_id;
3907     poptions= &options;
3908   }
3909 
3910   op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,
3911                                   (char *)buf, lm,
3912                                   (uchar *)(table->read_set->bitmap), poptions,
3913                                   sizeof(NdbOperation::OperationOptions));
3914 
3915   if (uses_blob_value(table->read_set) &&
3916       get_blob_values(op, buf, table->read_set) != 0)
3917     DBUG_RETURN(NULL);
3918 
3919   /* Perform 'empty update' to mark the read in the binlog, iff required */
3920   /*
3921    * Lock_mode = exclusive
3922    * Index = primary or unique
3923    * Session_state = marking_exclusive_reads
3924    * THEN
3925    * issue updateTuple with AnyValue explicitly set
3926    */
3927   if ((lm == NdbOperation::LM_Exclusive) &&
3928       /*
3929         We don't need to check index type
3930         (idx_type == PRIMARY_KEY_INDEX ||
3931         idx_type == PRIMARY_KEY_ORDERED_INDEX ||
3932         idx_type == UNIQUE_ORDERED_INDEX ||
3933         idx_type == UNIQUE_INDEX)
3934         since this method is only invoked for
3935         primary or unique indexes, but we do need to check
3936         if it was a hidden primary key.
3937       */
3938       idx_type != UNDEFINED_INDEX &&
3939       ndb_log_exclusive_reads(current_thd))
3940   {
3941     if (log_exclusive_read(key_rec, key, buf, ppartition_id) != 0)
3942       DBUG_RETURN(NULL);
3943   }
3944 
3945   DBUG_RETURN(op);
3946 }
3947 
3948 
3949 static
3950 bool
is_shrinked_varchar(const Field * field)3951 is_shrinked_varchar(const Field *field)
3952 {
3953   if (field->real_type() ==  MYSQL_TYPE_VARCHAR)
3954   {
3955     if (((Field_varstring*)field)->length_bytes == 1)
3956       return true;
3957   }
3958 
3959   return false;
3960 }
3961 
3962 int
pk_unique_index_read_key_pushed(uint idx,const uchar * key,Uint32 * ppartition_id)3963 ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx,
3964                                                const uchar *key,
3965                                                Uint32 *ppartition_id)
3966 {
3967   DBUG_ENTER("pk_unique_index_read_key_pushed");
3968   NdbOperation::OperationOptions options;
3969   NdbOperation::OperationOptions *poptions = NULL;
3970   options.optionsPresent= 0;
3971   NdbOperation::GetValueSpec gets[2];
3972 
3973   assert(m_thd_ndb->trans);
3974   assert(idx < MAX_KEY);
3975 
3976   if (m_active_query)
3977   {
3978     m_active_query->close(FALSE);
3979     m_active_query= NULL;
3980   }
3981 
3982   if (table_share->primary_key == MAX_KEY)
3983   {
3984     get_hidden_fields_keyop(&options, gets);
3985     poptions= &options;
3986   }
3987   get_read_set(false, idx);
3988 
3989   if (ppartition_id != NULL)
3990   {
3991     assert(m_user_defined_partitioning);
3992     options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3993     options.partitionId= *ppartition_id;
3994     poptions= &options;
3995   }
3996 
3997   KEY *key_def= &table->key_info[idx];
3998   KEY_PART_INFO *key_part;
3999 
4000   uint i;
4001   Uint32 offset= 0;
4002   NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];
4003   assert(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);
4004 
4005   uint map[ndb_pushed_join::MAX_KEY_PART];
4006   ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);
4007 
4008   // Bind key values defining root of pushed join
4009   for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)
4010   {
4011     bool shrinkVarChar= is_shrinked_varchar(key_part->field);
4012 
4013     if (key_part->null_bit)                         // Column is nullable
4014     {
4015       assert(idx != table_share->primary_key); // PK can't be nullable
4016       assert(*(key+offset)==0);                // Null values not allowed in key
4017                                                     // Value is imm. after NULL indicator
4018       paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);
4019     }
4020     else                                            // Non-nullable column
4021     {
4022       paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);
4023     }
4024     offset+= key_part->store_length;
4025   }
4026 
4027   const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);
4028   DBUG_RETURN(ret);
4029 }
4030 
4031 
4032 /** Count number of columns in key part. */
4033 static uint
count_key_columns(const KEY * key_info,const key_range * key)4034 count_key_columns(const KEY *key_info, const key_range *key)
4035 {
4036   KEY_PART_INFO *first_key_part= key_info->key_part;
4037   KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;
4038   KEY_PART_INFO *key_part;
4039   uint length= 0;
4040   for(key_part= first_key_part; key_part < key_part_end; key_part++)
4041   {
4042     if (length >= key->length)
4043       break;
4044     length+= key_part->store_length;
4045   }
4046   return (uint)(key_part - first_key_part);
4047 }
4048 
4049 /* Helper method to compute NDB index bounds. Note: does not set range_no. */
4050 /* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */
4051 void
compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,const KEY * key_info,const key_range * start_key,const key_range * end_key,int from)4052 compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,
4053                      const KEY *key_info,
4054                      const key_range *start_key, const key_range *end_key,
4055                      int from)
4056 {
4057   DBUG_ENTER("ha_ndbcluster::compute_index_bounds");
4058   DBUG_PRINT("info", ("from: %d", from));
4059 
4060 #ifndef NDEBUG
4061   DBUG_PRINT("info", ("key parts: %u length: %u",
4062                       key_info->user_defined_key_parts, key_info->key_length));
4063   {
4064     for (uint j= 0; j <= 1; j++)
4065     {
4066       const key_range* kr= (j == 0 ? start_key : end_key);
4067       if (kr)
4068       {
4069         DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",
4070                           j, kr->length, kr->keypart_map, kr->flag));
4071         DBUG_DUMP("key", kr->key, kr->length);
4072       }
4073       else
4074       {
4075         DBUG_PRINT("info", ("key range %u: none", j));
4076       }
4077     }
4078   }
4079 #endif
4080 
4081   if (start_key)
4082   {
4083     bound.low_key= (const char*)start_key->key;
4084     bound.low_key_count= count_key_columns(key_info, start_key);
4085     bound.low_inclusive=
4086       start_key->flag != HA_READ_AFTER_KEY &&
4087       start_key->flag != HA_READ_BEFORE_KEY;
4088   }
4089   else
4090   {
4091     bound.low_key= NULL;
4092     bound.low_key_count= 0;
4093   }
4094 
4095   /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */
4096   if (start_key &&
4097       (start_key->flag == HA_READ_KEY_EXACT ||
4098        start_key->flag == HA_READ_PREFIX_LAST) &&
4099       from != 1)
4100   {
4101     bound.high_key= bound.low_key;
4102     bound.high_key_count= bound.low_key_count;
4103     bound.high_inclusive= TRUE;
4104   }
4105   else if (end_key)
4106   {
4107     bound.high_key= (const char*)end_key->key;
4108     bound.high_key_count= count_key_columns(key_info, end_key);
4109     /*
4110       For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for
4111       the end_key.
4112       So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in
4113       start_key it does not set low_inclusive.
4114     */
4115     bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;
4116     if (end_key->flag == HA_READ_KEY_EXACT ||
4117         end_key->flag == HA_READ_PREFIX_LAST)
4118     {
4119       bound.low_key= bound.high_key;
4120       bound.low_key_count= bound.high_key_count;
4121       bound.low_inclusive= TRUE;
4122     }
4123   }
4124   else
4125   {
4126     bound.high_key= NULL;
4127     bound.high_key_count= 0;
4128   }
4129   DBUG_PRINT("info", ("start_flag=%d end_flag=%d"
4130                       " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",
4131                       start_key?start_key->flag:0, end_key?end_key->flag:0,
4132                       bound.low_key_count,
4133                       bound.low_key_count?bound.low_inclusive:0,
4134                       bound.high_key_count,
4135                       bound.high_key_count?bound.high_inclusive:0));
4136   DBUG_VOID_RETURN;
4137 }
4138 
4139 /**
4140   Start ordered index scan in NDB
4141 */
4142 
ordered_index_scan(const key_range * start_key,const key_range * end_key,bool sorted,bool descending,uchar * buf,part_id_range * part_spec)4143 int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
4144                                       const key_range *end_key,
4145                                       bool sorted, bool descending,
4146                                       uchar* buf, part_id_range *part_spec)
4147 {
4148   NdbTransaction *trans;
4149   NdbIndexScanOperation *op;
4150   int error;
4151 
4152   DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
4153   DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",
4154              active_index, sorted, descending, table->read_set->bitmap[0]));
4155   DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
4156 
4157   // Check that sorted seems to be initialised
4158   assert(sorted == 0 || sorted == 1);
4159 
4160   if (unlikely(!(trans= get_transaction(error))))
4161   {
4162     DBUG_RETURN(error);
4163   }
4164 
4165   if ((error= close_scan()))
4166     DBUG_RETURN(error);
4167 
4168   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4169 
4170   const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
4171   const NdbRecord *row_rec= m_ndb_record;
4172 
4173   NdbIndexScanOperation::IndexBound bound;
4174   NdbIndexScanOperation::IndexBound *pbound = NULL;
4175   if (start_key != NULL || end_key != NULL)
4176   {
4177     /*
4178        Compute bounds info, reversing range boundaries
4179        if descending
4180      */
4181     compute_index_bounds(bound,
4182                          table->key_info + active_index,
4183                          (descending?
4184                           end_key : start_key),
4185                          (descending?
4186                           start_key : end_key),
4187                          0);
4188     bound.range_no = 0;
4189     pbound = &bound;
4190   }
4191 
4192   if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
4193   {
4194     const int error= create_pushed_join();
4195     if (unlikely(error))
4196       DBUG_RETURN(error);
4197 
4198     NdbQuery* const query= m_active_query;
4199     if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)
4200                        ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending
4201                                                 : NdbQueryOptions::ScanOrdering_ascending))
4202     {
4203       ERR_RETURN(query->getNdbError());
4204     }
4205 
4206     if (pbound  && query->setBound(key_rec, pbound)!=0)
4207       ERR_RETURN(query->getNdbError());
4208 
4209     m_thd_ndb->m_scan_count++;
4210 
4211     bool prunable = false;
4212     if (unlikely(query->isPrunable(prunable) != 0))
4213       ERR_RETURN(query->getNdbError());
4214     if (prunable)
4215       m_thd_ndb->m_pruned_scan_count++;
4216 
4217     // Can't have BLOB in pushed joins (yet)
4218     assert(!uses_blob_value(table->read_set));
4219   }
4220   else
4221   {
4222     if (m_pushed_join_operation == PUSHED_ROOT)
4223     {
4224       m_thd_ndb->m_pushed_queries_dropped++;
4225     }
4226 
4227     NdbScanOperation::ScanOptions options;
4228     options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;
4229     options.scan_flags=0;
4230 
4231     NdbOperation::GetValueSpec gets[2];
4232     if (table_share->primary_key == MAX_KEY)
4233       get_hidden_fields_scan(&options, gets);
4234 
4235     get_read_set(true, active_index);
4236 
4237     if (lm == NdbOperation::LM_Read)
4238       options.scan_flags|= NdbScanOperation::SF_KeyInfo;
4239     if (sorted)
4240       options.scan_flags|= NdbScanOperation::SF_OrderByFull;
4241     if (descending)
4242       options.scan_flags|= NdbScanOperation::SF_Descending;
4243 
4244     /* Partition pruning */
4245     if (m_use_partition_pruning &&
4246         m_user_defined_partitioning && part_spec != NULL &&
4247         part_spec->start_part == part_spec->end_part)
4248     {
4249       /* Explicitly set partition id when pruning User-defined partitioned scan */
4250       options.partitionId = part_spec->start_part;
4251       options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4252     }
4253 
4254     NdbInterpretedCode code(m_table);
4255     if (m_cond && m_cond->generate_scan_filter(&code, &options))
4256       ERR_RETURN(code.getNdbError());
4257 
4258     if (!(op= trans->scanIndex(key_rec, row_rec, lm,
4259                                (uchar *)(table->read_set->bitmap),
4260                                pbound,
4261                                &options,
4262                                sizeof(NdbScanOperation::ScanOptions))))
4263       ERR_RETURN(trans->getNdbError());
4264 
4265     DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));
4266     m_thd_ndb->m_scan_count++;
4267     m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4268 
4269     if (uses_blob_value(table->read_set) &&
4270         get_blob_values(op, NULL, table->read_set) != 0)
4271       ERR_RETURN(op->getNdbError());
4272 
4273     m_active_cursor= op;
4274   }
4275 
4276   if (sorted)
4277   {
4278     m_thd_ndb->m_sorted_scan_count++;
4279   }
4280 
4281   if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4282     DBUG_RETURN(ndb_err(trans));
4283 
4284   DBUG_RETURN(next_result(buf));
4285 }
4286 
4287 static
4288 int
guess_scan_flags(NdbOperation::LockMode lm,const NDBTAB * tab,const MY_BITMAP * readset)4289 guess_scan_flags(NdbOperation::LockMode lm,
4290 		 const NDBTAB* tab, const MY_BITMAP* readset)
4291 {
4292   int flags= 0;
4293   flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
4294   if (tab->checkColumns(0, 0) & 2)
4295   {
4296     int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
4297 
4298     if (ret & 2)
4299     { // If disk columns...use disk scan
4300       flags |= NdbScanOperation::SF_DiskScan;
4301     }
4302     else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
4303     {
4304       // If no mem column is set and exclusive...guess disk scan
4305       flags |= NdbScanOperation::SF_DiskScan;
4306     }
4307   }
4308   return flags;
4309 }
4310 
4311 /*
4312   Start full table scan in NDB or unique index scan
4313  */
4314 
full_table_scan(const KEY * key_info,const key_range * start_key,const key_range * end_key,uchar * buf)4315 int ha_ndbcluster::full_table_scan(const KEY* key_info,
4316                                    const key_range *start_key,
4317                                    const key_range *end_key,
4318                                    uchar *buf)
4319 {
4320   int error;
4321   NdbTransaction *trans= m_thd_ndb->trans;
4322   part_id_range part_spec;
4323   bool use_set_part_id= FALSE;
4324   NdbOperation::GetValueSpec gets[2];
4325 
4326   DBUG_ENTER("full_table_scan");
4327   DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
4328 
4329   if (m_use_partition_pruning && m_user_defined_partitioning)
4330   {
4331     assert(m_pushed_join_operation != PUSHED_ROOT);
4332     part_spec.start_part= 0;
4333     part_spec.end_part= m_part_info->get_tot_partitions() - 1;
4334     prune_partition_set(table, &part_spec);
4335     DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
4336                         part_spec.start_part, part_spec.end_part));
4337     /*
4338       If partition pruning has found no partition in set
4339       we can return HA_ERR_END_OF_FILE
4340     */
4341     if (part_spec.start_part > part_spec.end_part)
4342     {
4343       DBUG_RETURN(HA_ERR_END_OF_FILE);
4344     }
4345 
4346     if (part_spec.start_part == part_spec.end_part)
4347     {
4348       /*
4349        * Only one partition is required to scan, if sorted is required
4350        * don't need it anymore since output from one ordered partitioned
4351        * index is always sorted.
4352        *
4353        * Note : This table scan pruning currently only occurs for
4354        * UserDefined partitioned tables.
4355        * It could be extended to occur for natively partitioned tables if
4356        * the Partitioning layer can make a key (e.g. start or end key)
4357        * available so that we can determine the correct pruning in the
4358        * NDBAPI layer.
4359        */
4360       use_set_part_id= TRUE;
4361       if (!trans)
4362         if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
4363                                                       error))))
4364           DBUG_RETURN(error);
4365     }
4366   }
4367   if (!trans)
4368     if (unlikely(!(trans= start_transaction(error))))
4369       DBUG_RETURN(error);
4370 
4371   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4372   NdbScanOperation::ScanOptions options;
4373   options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |
4374                             NdbScanOperation::ScanOptions::SO_PARALLEL);
4375   options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);
4376   options.parallel= DEFAULT_PARALLELISM;
4377 
4378   if (use_set_part_id) {
4379     assert(m_user_defined_partitioning);
4380     options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4381     options.partitionId = part_spec.start_part;
4382   };
4383 
4384   if (table_share->primary_key == MAX_KEY)
4385     get_hidden_fields_scan(&options, gets);
4386 
4387   get_read_set(true, MAX_KEY);
4388 
4389   if (check_if_pushable(NdbQueryOperationDef::TableScan))
4390   {
4391     const int error= create_pushed_join();
4392     if (unlikely(error))
4393       DBUG_RETURN(error);
4394 
4395     m_thd_ndb->m_scan_count++;
4396     // Can't have BLOB in pushed joins (yet)
4397     assert(!uses_blob_value(table->read_set));
4398   }
4399   else
4400   {
4401     if (m_pushed_join_operation == PUSHED_ROOT)
4402     {
4403       m_thd_ndb->m_pushed_queries_dropped++;
4404     }
4405 
4406     NdbScanOperation *op;
4407     NdbInterpretedCode code(m_table);
4408 
4409     if (!key_info)
4410     {
4411       if (m_cond && m_cond->generate_scan_filter(&code, &options))
4412         ERR_RETURN(code.getNdbError());
4413     }
4414     else
4415     {
4416       /* Unique index scan in NDB (full table scan with scan filter) */
4417       DBUG_PRINT("info", ("Starting unique index scan"));
4418       if (!m_cond)
4419         m_cond= new ha_ndbcluster_cond;
4420 
4421       if (!m_cond)
4422       {
4423         set_my_errno(HA_ERR_OUT_OF_MEM);
4424         DBUG_RETURN(my_errno());
4425       }
4426       if (m_cond->generate_scan_filter_from_key(&code, &options, key_info,
4427                                                 start_key, end_key))
4428         ERR_RETURN(code.getNdbError());
4429     }
4430 
4431     if (!(op= trans->scanTable(m_ndb_record, lm,
4432                                (uchar *)(table->read_set->bitmap),
4433                                &options, sizeof(NdbScanOperation::ScanOptions))))
4434       ERR_RETURN(trans->getNdbError());
4435 
4436     m_thd_ndb->m_scan_count++;
4437     m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4438 
4439     assert(m_active_cursor==NULL);
4440     m_active_cursor= op;
4441 
4442     if (uses_blob_value(table->read_set) &&
4443         get_blob_values(op, NULL, table->read_set) != 0)
4444       ERR_RETURN(op->getNdbError());
4445   } // if (check_if_pushable(NdbQueryOperationDef::TableScan))
4446 
4447   if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4448     DBUG_RETURN(ndb_err(trans));
4449   DBUG_PRINT("exit", ("Scan started successfully"));
4450   DBUG_RETURN(next_result(buf));
4451 } // ha_ndbcluster::full_table_scan()
4452 
4453 int
set_auto_inc(THD * thd,Field * field)4454 ha_ndbcluster::set_auto_inc(THD *thd, Field *field)
4455 {
4456   DBUG_ENTER("ha_ndbcluster::set_auto_inc");
4457   bool read_bit= bitmap_is_set(table->read_set, field->field_index);
4458   bitmap_set_bit(table->read_set, field->field_index);
4459   Uint64 next_val= (Uint64) field->val_int() + 1;
4460   if (!read_bit)
4461     bitmap_clear_bit(table->read_set, field->field_index);
4462   DBUG_RETURN(set_auto_inc_val(thd, next_val));
4463 }
4464 
4465 
4466 class Ndb_tuple_id_range_guard {
4467   NDB_SHARE* m_share;
4468 public:
Ndb_tuple_id_range_guard(NDB_SHARE * share)4469   Ndb_tuple_id_range_guard(NDB_SHARE* share) :
4470     m_share(share),
4471     range(share->tuple_id_range)
4472   {
4473     native_mutex_lock(&m_share->mutex);
4474   }
~Ndb_tuple_id_range_guard()4475   ~Ndb_tuple_id_range_guard()
4476   {
4477     native_mutex_unlock(&m_share->mutex);
4478   }
4479   Ndb::TupleIdRange& range;
4480 };
4481 
4482 
4483 inline
4484 int
set_auto_inc_val(THD * thd,Uint64 value)4485 ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)
4486 {
4487   Ndb *ndb= get_ndb(thd);
4488   DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");
4489   DBUG_PRINT("enter", ("value: %llu", value));
4490   if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))
4491   {
4492     Ndb_tuple_id_range_guard g(m_share);
4493     if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)
4494         == -1)
4495       ERR_RETURN(ndb->getNdbError());
4496   }
4497   DBUG_RETURN(0);
4498 }
4499 
4500 
4501 void
get_read_set(bool use_cursor,uint idx)4502 ha_ndbcluster::get_read_set(bool use_cursor, uint idx)
4503 {
4504   const bool is_delete=
4505     table->in_use->lex->sql_command == SQLCOM_DELETE ||
4506     table->in_use->lex->sql_command == SQLCOM_DELETE_MULTI;
4507 
4508   const bool is_update=
4509     table->in_use->lex->sql_command == SQLCOM_UPDATE ||
4510     table->in_use->lex->sql_command == SQLCOM_UPDATE_MULTI;
4511 
4512   assert(use_cursor ||
4513          idx == table_share->primary_key ||
4514          table->key_info[idx].flags & HA_NOSAME);
4515 
4516   if (!is_delete && !is_update)
4517   {
4518     return;
4519   }
4520 
4521   /**
4522    * It is questionable that we in some cases seems to
4523    * do a read even if 'm_read_before_write_removal_used'.
4524    * The usage pattern for this seems to be update/delete
4525    * cursors which establish a 'current of' position before
4526    * a delete- / updateCurrentTuple().
4527    * Anyway, as 'm_read_before_write_removal_used' we don't
4528    * have to add more columns to 'read_set'.
4529    *
4530    * FUTURE: Investigate if we could have completely
4531    * cleared the 'read_set'.
4532    *
4533    */
4534   if (m_read_before_write_removal_used)
4535   {
4536     return;
4537   }
4538 
4539   /**
4540    * If (part of) a primary key is updated, it is executed
4541    * as a delete+reinsert. In order to avoid extra read-round trips
4542    * to fetch missing columns required by reinsert:
4543    * Ensure all columns not being modified (in write_set)
4544    * are read prior to ::ndb_pk_update_row().
4545    * All PK columns are also required by ::ndb_delete_row()
4546    */
4547   if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
4548   {
4549     assert(table_share->primary_key != MAX_KEY);
4550     bitmap_set_all(&m_bitmap);
4551     bitmap_subtract(&m_bitmap, table->write_set);
4552     bitmap_union(table->read_set, &m_bitmap);
4553     bitmap_union(table->read_set, m_pk_bitmap_p);
4554   }
4555 
4556   /**
4557    * Determine whether we have to read PK columns in
4558    * addition to those columns already present in read_set.
4559    * NOTE: As checked above, It is a precondition that
4560    *       a read is required as part of delete/update
4561    *       (!m_read_before_write_removal_used)
4562    *
4563    * PK columns are required when:
4564    *  1) This is a primary/unique keyop.
4565    *     (i.e. not a positioned update/delete which
4566    *      maintain a 'current of' position.)
4567    *
4568    * In addition, when a 'current of' position is available:
4569    *  2) When deleting a row containing BLOBs PK is required
4570    *     to delete BLOB stored in seperate fragments.
4571    *  3) When updating BLOB columns PK is required to delete
4572    *     old BLOB + insert new BLOB contents
4573    */
4574   else
4575   if (!use_cursor ||                             // 1)
4576       (is_delete && table_share->blob_fields) || // 2)
4577       uses_blob_value(table->write_set))         // 3)
4578   {
4579     bitmap_union(table->read_set, m_pk_bitmap_p);
4580   }
4581 
4582   /**
4583    * If update/delete use partition pruning, we need
4584    * to read the column values which being part of the
4585    * partition spec as they are used by
4586    * ::get_parts_for_update() / ::get_parts_for_delete()
4587    * Part. columns are always part of PK, so we only
4588    * have to do this if pk_bitmap wasnt added yet,
4589    */
4590   else if (m_use_partition_pruning)  // && m_user_defined_partitioning)
4591   {
4592     assert(bitmap_is_subset(&m_part_info->full_part_field_set,
4593                             m_pk_bitmap_p));
4594     bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4595   }
4596 
4597 
4598   /**
4599    * Update might cause PK or Unique key violation.
4600    * Error reporting need values from the offending
4601    * unique columns to have been read:
4602    *
4603    * NOTE: This is NOT required for the correctness
4604    *       of the update operation itself. Maybe we
4605    *       should consider other strategies, like
4606    *       defering reading of the column values
4607    *       until formating the error message.
4608    */
4609   if (is_update && m_has_unique_index)
4610   {
4611     for (uint i= 0; i < table_share->keys; i++)
4612     {
4613       if ((table->key_info[i].flags & HA_NOSAME) &&
4614           bitmap_is_overlapping(table->write_set, m_key_fields[i]))
4615       {
4616         bitmap_union(table->read_set, m_key_fields[i]);
4617       }
4618     }
4619   }
4620 }
4621 
4622 
4623 Uint32
setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])4624 ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])
4625 {
4626   Uint32 num_gets= 0;
4627   /*
4628     We need to read the hidden primary key, and possibly the FRAGMENT
4629     pseudo-column.
4630   */
4631   gets[num_gets].column= get_hidden_key_column();
4632   gets[num_gets].appStorage= &m_ref;
4633   num_gets++;
4634   if (m_user_defined_partitioning)
4635   {
4636     /* Need to read partition id to support ORDER BY columns. */
4637     gets[num_gets].column= NdbDictionary::Column::FRAGMENT;
4638     gets[num_gets].appStorage= &m_part_id;
4639     num_gets++;
4640   }
4641   return num_gets;
4642 }
4643 
4644 void
get_hidden_fields_keyop(NdbOperation::OperationOptions * options,NdbOperation::GetValueSpec gets[2])4645 ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,
4646                                        NdbOperation::GetValueSpec gets[2])
4647 {
4648   Uint32 num_gets= setup_get_hidden_fields(gets);
4649   options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;
4650   options->extraGetValues= gets;
4651   options->numExtraGetValues= num_gets;
4652 }
4653 
4654 void
get_hidden_fields_scan(NdbScanOperation::ScanOptions * options,NdbOperation::GetValueSpec gets[2])4655 ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,
4656                                       NdbOperation::GetValueSpec gets[2])
4657 {
4658   Uint32 num_gets= setup_get_hidden_fields(gets);
4659   options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;
4660   options->extraGetValues= gets;
4661   options->numExtraGetValues= num_gets;
4662 }
4663 
4664 inline void
eventSetAnyValue(THD * thd,NdbOperation::OperationOptions * options) const4665 ha_ndbcluster::eventSetAnyValue(THD *thd,
4666                                 NdbOperation::OperationOptions *options) const
4667 {
4668   options->anyValue= 0;
4669   if (unlikely(m_slow_path))
4670   {
4671     /*
4672       Ignore TNTO_NO_LOGGING for slave thd.  It is used to indicate
4673       log-slave-updates option.  This is instead handled in the
4674       injector thread, by looking explicitly at the
4675       opt_log_slave_updates flag.
4676     */
4677     Thd_ndb *thd_ndb= get_thd_ndb(thd);
4678     if (thd->slave_thread)
4679     {
4680       /*
4681         Slave-thread, we are applying a replicated event.
4682         We set the server_id to the value received from the log which
4683         may be a composite of server_id and other data according
4684         to the server_id_bits option.
4685         In future it may be useful to support *not* mapping composite
4686         AnyValues to/from Binlogged server-ids
4687       */
4688       options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4689       options->anyValue = thd_unmasked_server_id(thd);
4690     }
4691     else if (thd_ndb->trans_options & TNTO_NO_LOGGING)
4692     {
4693       options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4694       ndbcluster_anyvalue_set_nologging(options->anyValue);
4695     }
4696   }
4697 #ifndef NDEBUG
4698   DBUG_EXECUTE_IF("ndb_set_reflect_anyvalue",
4699                   {
4700                     fprintf(stderr, "Ndb forcing reflect AnyValue\n");
4701                     options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4702                     ndbcluster_anyvalue_set_reflect_op(options->anyValue);
4703                   });
4704   DBUG_EXECUTE_IF("ndb_set_refresh_anyvalue",
4705                   {
4706                     fprintf(stderr, "Ndb forcing refresh AnyValue\n");
4707                     options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4708                     ndbcluster_anyvalue_set_refresh_op(options->anyValue);
4709                   });
4710 
4711   /*
4712     MySQLD will set the user-portion of AnyValue (if any) to all 1s
4713     This tests code filtering ServerIds on the value of server-id-bits.
4714   */
4715   const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");
4716   if (p != 0  && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
4717   {
4718     options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4719     dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);
4720   }
4721 #endif
4722 }
4723 
4724 #ifdef HAVE_NDB_BINLOG
4725 
4726 /**
4727    prepare_conflict_detection
4728 
4729    This method is called during operation definition by the slave,
4730    when writing to a table with conflict detection defined.
4731 
4732    It is responsible for defining and adding any operation filtering
4733    required, and for saving any operation definition state required
4734    for post-execute analysis.
4735 
4736    For transactional detection, this method may determine that the
4737    operation being defined should not be executed, and conflict
4738    handling should occur immediately.  In this case, conflict_handled
4739    is set to true.
4740 */
4741 int
prepare_conflict_detection(enum_conflicting_op_type op_type,const NdbRecord * key_rec,const NdbRecord * data_rec,const uchar * old_data,const uchar * new_data,const MY_BITMAP * write_set,NdbTransaction * trans,NdbInterpretedCode * code,NdbOperation::OperationOptions * options,bool & conflict_handled,bool & avoid_ndbapi_write)4742 ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,
4743                                           const NdbRecord* key_rec,
4744                                           const NdbRecord* data_rec,
4745                                           const uchar* old_data,
4746                                           const uchar* new_data,
4747                                           const MY_BITMAP *write_set,
4748                                           NdbTransaction* trans,
4749                                           NdbInterpretedCode* code,
4750                                           NdbOperation::OperationOptions* options,
4751                                           bool& conflict_handled,
4752                                           bool& avoid_ndbapi_write)
4753 {
4754   DBUG_ENTER("prepare_conflict_detection");
4755   THD* thd = table->in_use;
4756   int res = 0;
4757   assert(thd->slave_thread);
4758 
4759   conflict_handled = false;
4760 
4761   /*
4762     Special check for apply_status table, as we really don't want
4763     to do any special handling with it
4764   */
4765   if (unlikely(m_share == ndb_apply_status_share))
4766   {
4767     DBUG_RETURN(0);
4768   }
4769 
4770   /*
4771      Check transaction id first, as in transactional conflict detection,
4772      the transaction id is what eventually dictates whether an operation
4773      is applied or not.
4774 
4775      Not that this applies even if the current operation's table does not
4776      have a conflict function defined - if a transaction spans a 'transactional
4777      conflict detection' table and a non transactional table, the non-transactional
4778      table's data will also be reverted.
4779   */
4780   Uint64 transaction_id = Ndb_binlog_extra_row_info::InvalidTransactionId;
4781   Uint16 conflict_flags = Ndb_binlog_extra_row_info::UnsetConflictFlags;
4782   bool op_is_marked_as_read= false;
4783   bool op_is_marked_as_reflected= false;
4784   bool op_is_marked_as_refresh= false;
4785 
4786   if (thd->binlog_row_event_extra_data)
4787   {
4788     Ndb_binlog_extra_row_info extra_row_info;
4789     if (extra_row_info.loadFromBuffer(thd->binlog_row_event_extra_data) != 0)
4790     {
4791       sql_print_warning("NDB Slave : Malformed event received on table %s "
4792                         "cannot parse.  Stopping Slave.",
4793                         m_share->key_string());
4794       DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
4795     }
4796 
4797     if (extra_row_info.getFlags() &
4798         Ndb_binlog_extra_row_info::NDB_ERIF_TRANSID)
4799       transaction_id = extra_row_info.getTransactionId();
4800 
4801     if (extra_row_info.getFlags() &
4802         Ndb_binlog_extra_row_info::NDB_ERIF_CFT_FLAGS)
4803     {
4804       DBUG_PRINT("info",
4805                  ("Slave : have conflict flags : %x\n",
4806                   extra_row_info.getConflictFlags()));
4807       conflict_flags = extra_row_info.getConflictFlags();
4808 
4809       if (conflict_flags & NDB_ERIF_CFT_REFLECT_OP)
4810       {
4811         op_is_marked_as_reflected= true;
4812         g_ndb_slave_state.current_reflect_op_prepare_count++;
4813       }
4814 
4815       if (conflict_flags & NDB_ERIF_CFT_REFRESH_OP)
4816       {
4817         op_is_marked_as_refresh= true;
4818         g_ndb_slave_state.current_refresh_op_count++;
4819       }
4820 
4821       if (conflict_flags & NDB_ERIF_CFT_READ_OP)
4822         op_is_marked_as_read= true;
4823 
4824       /* Sanity - 1 flag at a time at most */
4825       assert(! (op_is_marked_as_reflected &&
4826                 op_is_marked_as_refresh));
4827       assert(! (op_is_marked_as_read &&
4828                 (op_is_marked_as_reflected ||
4829                  op_is_marked_as_refresh)));
4830     }
4831   }
4832 
4833   const st_conflict_fn_def* conflict_fn = (m_share->m_cfn_share?
4834                                            m_share->m_cfn_share->m_conflict_fn:
4835                                            NULL);
4836 
4837   bool pass_mode = false;
4838   if (conflict_fn)
4839   {
4840     /* Check Slave Conflict Role Variable setting */
4841     if (conflict_fn->flags & CF_USE_ROLE_VAR)
4842     {
4843       switch (opt_ndb_slave_conflict_role)
4844       {
4845       case SCR_NONE:
4846       {
4847         sql_print_warning("NDB Slave : Conflict function %s defined on "
4848                           "table %s requires ndb_slave_conflict_role variable "
4849                           "to be set.  Stopping slave.",
4850                           conflict_fn->name,
4851                           m_share->key_string());
4852         DBUG_RETURN(ER_SLAVE_CONFIGURATION);
4853       }
4854       case SCR_PASS:
4855       {
4856         pass_mode = true;
4857       }
4858       default:
4859         /* PRIMARY, SECONDARY */
4860         break;
4861       }
4862     }
4863   }
4864 
4865   {
4866     bool handle_conflict_now = false;
4867     const uchar* row_data = (op_type == WRITE_ROW? new_data : old_data);
4868     int res = g_ndb_slave_state.atPrepareConflictDetection(m_table,
4869                                                            key_rec,
4870                                                            row_data,
4871                                                            transaction_id,
4872                                                            handle_conflict_now);
4873     if (res)
4874       DBUG_RETURN(res);
4875 
4876     if (handle_conflict_now)
4877     {
4878       DBUG_PRINT("info", ("Conflict handling for row occurring now"));
4879       NdbError noRealConflictError;
4880       /*
4881        * If the user operation was a read and we receive an update
4882        * log event due to an AnyValue update, then the conflicting operation
4883        * should be reported as a read.
4884        */
4885       enum_conflicting_op_type conflicting_op=
4886         (op_type == UPDATE_ROW && op_is_marked_as_read)?
4887         READ_ROW
4888         : op_type;
4889       /*
4890          Directly handle the conflict here - e.g refresh/ write to
4891          exceptions table etc.
4892       */
4893       res = handle_row_conflict(m_share->m_cfn_share,
4894                                 m_share->table_name,
4895                                 m_share->flags & NSF_BLOB_FLAG,
4896                                 "Transaction",
4897                                 key_rec,
4898                                 data_rec,
4899                                 old_data,
4900                                 new_data,
4901                                 conflicting_op,
4902                                 TRANS_IN_CONFLICT,
4903                                 noRealConflictError,
4904                                 trans,
4905                                 write_set,
4906                                 transaction_id);
4907       if (unlikely(res))
4908         DBUG_RETURN(res);
4909 
4910       g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;
4911 
4912       /*
4913         Indicate that there (may be) some more operations to
4914         execute before committing
4915       */
4916       m_thd_ndb->m_unsent_bytes+= 12;
4917       conflict_handled = true;
4918       DBUG_RETURN(0);
4919     }
4920   }
4921 
4922   if (conflict_fn == NULL ||
4923       pass_mode)
4924   {
4925     /* No conflict function definition required */
4926     DBUG_RETURN(0);
4927   }
4928 
4929   /**
4930    * By default conflict algorithms use the 'natural' NdbApi ops
4931    * (insert/update/delete) which can detect presence anomalies,
4932    * as opposed to NdbApi write which ignores them.
4933    * However in some cases, we want to use NdbApi write to apply
4934    * events received on tables with conflict detection defined
4935    * (e.g. when we want to forcibly align a row with a refresh op).
4936    */
4937   avoid_ndbapi_write = true;
4938 
4939   if (unlikely((conflict_fn->flags & CF_TRANSACTIONAL) &&
4940                (transaction_id == Ndb_binlog_extra_row_info::InvalidTransactionId)))
4941   {
4942     sql_print_warning("NDB Slave : Transactional conflict detection defined on table %s, but "
4943                       "events received without transaction ids.  Check --ndb-log-transaction-id setting "
4944                       "on upstream Cluster.",
4945                       m_share->key_string());
4946     /* This is a user error, but we want them to notice, so treat seriously */
4947     DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
4948   }
4949 
4950   /**
4951    * Normally, update and delete have an attached program executed against
4952    * the existing row content.  Insert (and NdbApi write) do not.
4953    * Insert cannot as there is no pre-existing row to examine (and therefore
4954    * no non prepare-time deterministic decisions to make).
4955    * NdbApi Write technically could if the row already existed, but this is
4956    * not currently supported by NdbApi.
4957    */
4958   bool prepare_interpreted_program = (op_type != WRITE_ROW);
4959 
4960   if (conflict_fn->flags & CF_REFLECT_SEC_OPS)
4961   {
4962     /* This conflict function reflects secondary ops at the Primary */
4963 
4964     if (opt_ndb_slave_conflict_role == SCR_PRIMARY)
4965     {
4966       /**
4967        * Here we mark the applied operations to indicate that they
4968        * should be reflected back to the SECONDARY cluster.
4969        * This is required so that :
4970        *   1.  They are given local Binlog Event source serverids
4971        *       and so will pass through to the storage engine layer
4972        *       on the SECONDARY.
4973        *       (Normally they would be filtered in the Slave IO thread
4974        *        as having returned-to-source)
4975        *
4976        *   2.  They can be tagged as reflected so that the SECONDARY
4977        *       can handle them differently
4978        *       (They are force-applied)
4979        */
4980       DBUG_PRINT("info", ("Setting AnyValue to reflect secondary op"));
4981 
4982       options->optionsPresent |=
4983         NdbOperation::OperationOptions::OO_ANYVALUE;
4984       ndbcluster_anyvalue_set_reflect_op(options->anyValue);
4985     }
4986     else if (opt_ndb_slave_conflict_role == SCR_SECONDARY)
4987     {
4988       /**
4989        * On the Secondary, we receive reflected operations which
4990        * we want to attempt to apply under certain conditions.
4991        * This is done to recover from situations where
4992        * both PRIMARY and SECONDARY have performed concurrent
4993        * DELETEs.
4994        *
4995        * For non reflected operations we want to apply Inserts and
4996        * Updates using write_tuple() to get an idempotent effect
4997        */
4998       if (op_is_marked_as_reflected)
4999       {
5000         /**
5001          * Apply operations using their 'natural' operation types
5002          * with interpreted programs attached where appropriate.
5003          * Natural operation types used so that we become aware
5004          * of any 'presence' issues (row does/not exist).
5005          */
5006         DBUG_PRINT("info", ("Reflected operation"));
5007       }
5008       else
5009       {
5010         /**
5011          * Either a normal primary sourced change, or a refresh
5012          * operation.
5013          * In both cases we want to apply the operation idempotently,
5014          * and there's no need for an interpreted program.
5015          * e.g.
5016          *   WRITE_ROW  -> NdbApi write_row
5017          *   UPDATE_ROW -> NdbApi write_row
5018          *   DELETE_ROW -> NdbApi delete_row
5019          *
5020          * NdbApi write_row does not fail.
5021          * NdbApi delete_row will complain if the row does not exist
5022          * but this will be ignored
5023          */
5024         DBUG_PRINT("info", ("Allowing use of NdbApi write_row "
5025                             "for non reflected op (%u)",
5026                             op_is_marked_as_refresh));
5027         prepare_interpreted_program = false;
5028         avoid_ndbapi_write = false;
5029       }
5030     }
5031   }
5032 
5033   /*
5034      Prepare interpreted code for operation (update + delete only) according
5035      to algorithm used
5036   */
5037   if (prepare_interpreted_program)
5038   {
5039     res = conflict_fn->prep_func(m_share->m_cfn_share,
5040                                  op_type,
5041                                  m_ndb_record,
5042                                  old_data,
5043                                  new_data,
5044                                  table->read_set,  // Before image
5045                                  table->write_set, // After image
5046                                  code);
5047 
5048     if (res == 0)
5049     {
5050       if (code->getWordsUsed() > 0)
5051       {
5052         /* Attach conflict detecting filter program to operation */
5053         options->optionsPresent|=
5054           NdbOperation::OperationOptions::OO_INTERPRETED;
5055         options->interpretedCode= code;
5056       }
5057     }
5058     else
5059     {
5060       sql_print_warning("NDB Slave : Binlog event on table %s missing "
5061                         "info necessary for conflict detection.  "
5062                         "Check binlog format options on upstream cluster.",
5063                         m_share->key_string());
5064       DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT);
5065     }
5066   } // if (op_type != WRITE_ROW)
5067 
5068   g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;
5069 
5070   /* Now save data for potential insert to exceptions table... */
5071   Ndb_exceptions_data ex_data;
5072   ex_data.share= m_share;
5073   ex_data.key_rec= key_rec;
5074   ex_data.data_rec= data_rec;
5075   ex_data.op_type= op_type;
5076   ex_data.reflected_operation = op_is_marked_as_reflected;
5077   ex_data.trans_id= transaction_id;
5078   /*
5079     We need to save the row data for possible conflict resolution after
5080     execute().
5081   */
5082   if (old_data)
5083     ex_data.old_row= copy_row_to_buffer(m_thd_ndb, old_data);
5084   if (old_data != NULL && ex_data.old_row == NULL)
5085   {
5086     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5087   }
5088   if (new_data)
5089     ex_data.new_row= copy_row_to_buffer(m_thd_ndb, new_data);
5090   if (new_data !=  NULL && ex_data.new_row == NULL)
5091   {
5092     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5093   }
5094 
5095   ex_data.bitmap_buf= NULL;
5096   ex_data.write_set= NULL;
5097   if (table->write_set)
5098   {
5099     /* Copy table write set */
5100     ex_data.bitmap_buf=
5101       (my_bitmap_map *) get_buffer(m_thd_ndb, table->s->column_bitmap_size);
5102     if (ex_data.bitmap_buf == NULL)
5103     {
5104       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5105     }
5106     ex_data.write_set= (MY_BITMAP*) get_buffer(m_thd_ndb, sizeof(MY_BITMAP));
5107     if (ex_data.write_set == NULL)
5108     {
5109       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5110     }
5111     bitmap_init(ex_data.write_set, ex_data.bitmap_buf,
5112                 table->write_set->n_bits, false);
5113     bitmap_copy(ex_data.write_set, table->write_set);
5114   }
5115 
5116   uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));
5117   if (ex_data_buffer == NULL)
5118   {
5119     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5120   }
5121   memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));
5122 
5123   /* Store ptr to exceptions data in operation 'customdata' ptr */
5124   options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;
5125   options->customData= (void*)ex_data_buffer;
5126 
5127   DBUG_RETURN(0);
5128 }
5129 
5130 /**
5131    handle_conflict_op_error
5132 
5133    This method is called when an error is detected after executing an
5134    operation with conflict detection active.
5135 
5136    If the operation error is related to conflict detection, handling
5137    starts.
5138 
5139    Handling involves incrementing the relevant counter, and optionally
5140    refreshing the row and inserting an entry into the exceptions table
5141 */
5142 
5143 static int
handle_conflict_op_error(NdbTransaction * trans,const NdbError & err,const NdbOperation * op)5144 handle_conflict_op_error(NdbTransaction* trans,
5145                          const NdbError& err,
5146                          const NdbOperation* op)
5147 {
5148   DBUG_ENTER("handle_conflict_op_error");
5149   DBUG_PRINT("info", ("ndb error: %d", err.code));
5150 
5151   if ((err.code == (int) error_conflict_fn_violation) ||
5152       (err.code == (int) error_op_after_refresh_op) ||
5153       (err.classification == NdbError::ConstraintViolation) ||
5154       (err.classification == NdbError::NoDataFound))
5155   {
5156     DBUG_PRINT("info",
5157                ("err.code = %s, err.classification = %s",
5158                ((err.code == (int) error_conflict_fn_violation)?
5159                 "error_conflict_fn_violation":
5160                 ((err.code == (int) error_op_after_refresh_op)?
5161                  "error_op_after_refresh_op" : "?")),
5162                ((err.classification == NdbError::ConstraintViolation)?
5163                 "ConstraintViolation":
5164                 ((err.classification == NdbError::NoDataFound)?
5165                  "NoDataFound" : "?"))));
5166 
5167     enum_conflict_cause conflict_cause;
5168 
5169     /* Map cause onto our conflict description type */
5170     if ((err.code == (int) error_conflict_fn_violation) ||
5171         (err.code == (int) error_op_after_refresh_op))
5172     {
5173       DBUG_PRINT("info", ("ROW_IN_CONFLICT"));
5174       conflict_cause= ROW_IN_CONFLICT;
5175     }
5176     else if (err.classification == NdbError::ConstraintViolation)
5177     {
5178       DBUG_PRINT("info", ("ROW_ALREADY_EXISTS"));
5179       conflict_cause= ROW_ALREADY_EXISTS;
5180     }
5181     else
5182     {
5183       assert(err.classification == NdbError::NoDataFound);
5184       DBUG_PRINT("info", ("ROW_DOES_NOT_EXIST"));
5185       conflict_cause= ROW_DOES_NOT_EXIST;
5186     }
5187 
5188     /* Get exceptions data from operation */
5189     const void* buffer=op->getCustomData();
5190     assert(buffer);
5191     Ndb_exceptions_data ex_data;
5192     memcpy(&ex_data, buffer, sizeof(ex_data));
5193     NDB_SHARE *share= ex_data.share;
5194     NDB_CONFLICT_FN_SHARE* cfn_share= share ? share->m_cfn_share : NULL;
5195 
5196     const NdbRecord* key_rec= ex_data.key_rec;
5197     const NdbRecord* data_rec= ex_data.data_rec;
5198     const uchar* old_row= ex_data.old_row;
5199     const uchar* new_row= ex_data.new_row;
5200 #ifndef NDEBUG
5201     const uchar* row=
5202       (ex_data.op_type == DELETE_ROW)?
5203       ex_data.old_row : ex_data.new_row;
5204 #endif
5205     enum_conflicting_op_type causing_op_type= ex_data.op_type;
5206     const MY_BITMAP *write_set= ex_data.write_set;
5207 
5208     DBUG_PRINT("info", ("Conflict causing op type : %u",
5209                         causing_op_type));
5210 
5211     if (causing_op_type == REFRESH_ROW)
5212     {
5213       /*
5214          The failing op was a refresh row, we require that it
5215          failed due to being a duplicate (e.g. a refresh
5216          occurring on a refreshed row)
5217        */
5218       if (err.code == (int) error_op_after_refresh_op)
5219       {
5220         DBUG_PRINT("info", ("Operation after refresh - ignoring"));
5221         DBUG_RETURN(0);
5222       }
5223       else
5224       {
5225         DBUG_PRINT("info", ("Refresh op hit real error %u", err.code));
5226         /* Unexpected error, normal handling*/
5227         DBUG_RETURN(err.code);
5228       }
5229     }
5230 
5231     if (ex_data.reflected_operation)
5232     {
5233       DBUG_PRINT("info", ("Reflected operation error : %u.",
5234                           err.code));
5235 
5236       /**
5237        * Expected cases are :
5238        *   Insert : Row already exists :      Don't care - discard
5239        *              Secondary has this row, or a future version
5240        *
5241        *   Update : Row does not exist :      Don't care - discard
5242        *              Secondary has deleted this row later.
5243        *
5244        *            Conflict
5245        *            (Row written here last) : Don't care - discard
5246        *              Secondary has this row, or a future version
5247        *
5248        *   Delete : Row does not exist :      Don't care - discard
5249        *              Secondary has deleted this row later.
5250        *
5251        *            Conflict
5252        *            (Row written here last) : Don't care - discard
5253        *              Secondary has a future version of this row
5254        *
5255        *   Presence and authorship conflicts are used to determine
5256        *   whether to apply a reflecte operation.
5257        *   The presence checks avoid divergence and the authorship
5258        *   checks avoid all actions being applied in delayed
5259        *   duplicate.
5260        */
5261       assert((err.code == (int) error_conflict_fn_violation) ||
5262              (err.classification == NdbError::ConstraintViolation) ||
5263              (err.classification == NdbError::NoDataFound));
5264 
5265       g_ndb_slave_state.current_reflect_op_discard_count++;
5266 
5267       DBUG_RETURN(0);
5268     }
5269 
5270     {
5271       /**
5272        * For asymmetric algorithms that use the ROLE variable to
5273        * determine their role, we check whether we are on the
5274        * SECONDARY cluster.
5275        * This is far as we want to process conflicts on the
5276        * SECONDARY.
5277        */
5278       bool secondary = cfn_share &&
5279         cfn_share->m_conflict_fn &&
5280         (cfn_share->m_conflict_fn->flags & CF_USE_ROLE_VAR) &&
5281         (opt_ndb_slave_conflict_role == SCR_SECONDARY);
5282 
5283       if (secondary)
5284       {
5285         DBUG_PRINT("info", ("Conflict detected, on secondary - ignore"));
5286         DBUG_RETURN(0);
5287       }
5288     }
5289 
5290     assert(share != NULL && row != NULL);
5291     bool table_has_trans_conflict_detection =
5292       cfn_share &&
5293       cfn_share->m_conflict_fn &&
5294       (cfn_share->m_conflict_fn->flags & CF_TRANSACTIONAL);
5295 
5296     if (table_has_trans_conflict_detection)
5297     {
5298       /* Mark this transaction as in-conflict, unless this is a
5299        * Delete-Delete conflict, which we can't currently handle
5300        * in the normal way
5301        */
5302       if (! ((causing_op_type == DELETE_ROW) &&
5303              (conflict_cause == ROW_DOES_NOT_EXIST)))
5304       {
5305         /* Perform special transactional conflict-detected handling */
5306         int res = g_ndb_slave_state.atTransConflictDetected(ex_data.trans_id);
5307         if (res)
5308           DBUG_RETURN(res);
5309       }
5310     }
5311 
5312     if (cfn_share)
5313     {
5314       /* Now handle the conflict on this row */
5315       enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;
5316 
5317       g_ndb_slave_state.current_violation_count[cft]++;
5318 
5319       int res = handle_row_conflict(cfn_share,
5320                                     share->table_name,
5321                                     false, /* table_has_blobs */
5322                                     "Row",
5323                                     key_rec,
5324                                     data_rec,
5325                                     old_row,
5326                                     new_row,
5327                                     causing_op_type,
5328                                     conflict_cause,
5329                                     err,
5330                                     trans,
5331                                     write_set,
5332                                     /*
5333                                       ORIG_TRANSID not available for
5334                                       non-transactional conflict detection.
5335                                     */
5336                                     Ndb_binlog_extra_row_info::InvalidTransactionId);
5337 
5338       DBUG_RETURN(res);
5339     }
5340     else
5341     {
5342       DBUG_PRINT("info", ("missing cfn_share"));
5343       DBUG_RETURN(0); // TODO : Correct?
5344     }
5345   }
5346   else
5347   {
5348     /* Non conflict related error */
5349     DBUG_PRINT("info", ("err.code == %u", err.code));
5350     DBUG_RETURN(err.code);
5351   }
5352 
5353   DBUG_RETURN(0); // Reachable?
5354 }
5355 
5356 /*
5357   is_serverid_local
5358 */
is_serverid_local(Uint32 serverid)5359 static bool is_serverid_local(Uint32 serverid)
5360 {
5361   /*
5362      If it's not our serverid, check the
5363      IGNORE_SERVER_IDS setting to check if
5364      it's local.
5365   */
5366   return ((serverid == ::server_id) ||
5367           ndb_mi_get_ignore_server_id(serverid));
5368 }
5369 #endif
5370 
write_row(uchar * record)5371 int ha_ndbcluster::write_row(uchar *record)
5372 {
5373   DBUG_ENTER("ha_ndbcluster::write_row");
5374 #ifdef HAVE_NDB_BINLOG
5375   if (m_share == ndb_apply_status_share && table->in_use->slave_thread)
5376   {
5377     uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();
5378     uint64 row_epoch;
5379     memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),
5380            sizeof(row_server_id));
5381     memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),
5382            sizeof(row_epoch));
5383     int rc = g_ndb_slave_state.atApplyStatusWrite(master_server_id,
5384                                                   row_server_id,
5385                                                   row_epoch,
5386                                                   is_serverid_local(row_server_id));
5387     if (rc != 0)
5388     {
5389       /* Stop Slave */
5390       DBUG_RETURN(rc);
5391     }
5392   }
5393 #endif /* HAVE_NDB_BINLOG */
5394   DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));
5395 }
5396 
5397 /**
5398   Insert one record into NDB
5399 */
ndb_write_row(uchar * record,bool primary_key_update,bool batched_update)5400 int ha_ndbcluster::ndb_write_row(uchar *record,
5401                                  bool primary_key_update,
5402                                  bool batched_update)
5403 {
5404   bool has_auto_increment;
5405   const NdbOperation *op;
5406   THD *thd= table->in_use;
5407   Thd_ndb *thd_ndb= m_thd_ndb;
5408   NdbTransaction *trans;
5409   uint32 part_id;
5410   int error= 0;
5411   NdbOperation::SetValueSpec sets[3];
5412   Uint32 num_sets= 0;
5413   DBUG_ENTER("ha_ndbcluster::ndb_write_row");
5414 
5415   error = check_slave_state(thd);
5416   if (unlikely(error))
5417     DBUG_RETURN(error);
5418 
5419   has_auto_increment= (table->next_number_field && record == table->record[0]);
5420 
5421   if (has_auto_increment && table_share->primary_key != MAX_KEY)
5422   {
5423     /*
5424      * Increase any auto_incremented primary key
5425      */
5426     m_skip_auto_increment= FALSE;
5427     if ((error= update_auto_increment()))
5428       DBUG_RETURN(error);
5429     m_skip_auto_increment= (insert_id_for_cur_row == 0 ||
5430                             thd->auto_inc_intervals_forced.nb_elements());
5431   }
5432 
5433   /*
5434    * If IGNORE the ignore constraint violations on primary and unique keys
5435    */
5436   if (!m_use_write && m_ignore_dup_key)
5437   {
5438     /*
5439       compare if expression with that in start_bulk_insert()
5440       start_bulk_insert will set parameters to ensure that each
5441       write_row is committed individually
5442     */
5443     int peek_res= peek_indexed_rows(record, NDB_INSERT);
5444 
5445     if (!peek_res)
5446     {
5447       error= HA_ERR_FOUND_DUPP_KEY;
5448     }
5449     else if (peek_res != HA_ERR_KEY_NOT_FOUND)
5450     {
5451       error= peek_res;
5452     }
5453     if (error)
5454     {
5455       if ((has_auto_increment) && (m_skip_auto_increment))
5456       {
5457         int ret_val;
5458         if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5459         {
5460           DBUG_RETURN(ret_val);
5461         }
5462       }
5463       m_skip_auto_increment= TRUE;
5464       DBUG_RETURN(error);
5465     }
5466   }
5467 
5468   bool uses_blobs= uses_blob_value(table->write_set);
5469 
5470   Uint64 auto_value;
5471   const NdbRecord *key_rec;
5472   const uchar *key_row;
5473   if (table_share->primary_key == MAX_KEY)
5474   {
5475     /* Table has hidden primary key. */
5476     Ndb *ndb= get_ndb(thd);
5477     uint retries= NDB_AUTO_INCREMENT_RETRIES;
5478     int retry_sleep= 30; /* 30 milliseconds, transaction */
5479     for (;;)
5480     {
5481       Ndb_tuple_id_range_guard g(m_share);
5482       if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)
5483       {
5484 	if (--retries && !thd->killed &&
5485 	    ndb->getNdbError().status == NdbError::TemporaryError)
5486 	{
5487 	  do_retry_sleep(retry_sleep);
5488 	  continue;
5489 	}
5490 	ERR_RETURN(ndb->getNdbError());
5491       }
5492       break;
5493     }
5494     sets[num_sets].column= get_hidden_key_column();
5495     sets[num_sets].value= &auto_value;
5496     num_sets++;
5497     key_rec= m_ndb_hidden_key_record;
5498     key_row= (const uchar *)&auto_value;
5499   }
5500   else
5501   {
5502     key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
5503     key_row= record;
5504   }
5505 
5506   trans= thd_ndb->trans;
5507   if (m_user_defined_partitioning)
5508   {
5509     assert(m_use_partition_pruning);
5510     longlong func_value= 0;
5511     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5512     error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
5513     dbug_tmp_restore_column_map(table->read_set, old_map);
5514     if (unlikely(error))
5515     {
5516       m_part_info->err_value= func_value;
5517       DBUG_RETURN(error);
5518     }
5519     {
5520       /*
5521         We need to set the value of the partition function value in
5522         NDB since the NDB kernel doesn't have easy access to the function
5523         to calculate the value.
5524       */
5525       if (func_value >= INT_MAX32)
5526         func_value= INT_MAX32;
5527       sets[num_sets].column= get_partition_id_column();
5528       sets[num_sets].value= &func_value;
5529       num_sets++;
5530     }
5531     if (!trans)
5532       if (unlikely(!(trans= start_transaction_part_id(part_id, error))))
5533         DBUG_RETURN(error);
5534   }
5535   else if (!trans)
5536   {
5537     if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))
5538       DBUG_RETURN(error);
5539   }
5540   assert(trans);
5541 
5542   ha_statistic_increment(&SSV::ha_write_count);
5543 
5544   /*
5545      Setup OperationOptions
5546    */
5547   NdbOperation::OperationOptions options;
5548   NdbOperation::OperationOptions *poptions = NULL;
5549   options.optionsPresent=0;
5550 
5551   eventSetAnyValue(thd, &options);
5552   const bool need_flush=
5553       thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);
5554 
5555   const Uint32 authorValue = 1;
5556   if ((thd->slave_thread) &&
5557       (m_table->getExtraRowAuthorBits()))
5558   {
5559     /* Set author to indicate slave updated last */
5560     sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5561     sets[num_sets].value= &authorValue;
5562     num_sets++;
5563   }
5564 
5565   if (m_user_defined_partitioning)
5566   {
5567     options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
5568     options.partitionId= part_id;
5569   }
5570   if (num_sets)
5571   {
5572     options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;
5573     options.extraSetValues= sets;
5574     options.numExtraSetValues= num_sets;
5575   }
5576   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5577   {
5578     options.optionsPresent |=
5579       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5580   }
5581 
5582   if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
5583   {
5584     DBUG_PRINT("info", ("Disabling foreign keys"));
5585     options.optionsPresent |=
5586       NdbOperation::OperationOptions::OO_DISABLE_FK;
5587   }
5588 
5589   if (options.optionsPresent != 0)
5590     poptions=&options;
5591 
5592   const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;
5593   uint32 tmpBitmapSpace[bitmapSz];
5594   MY_BITMAP tmpBitmap;
5595   MY_BITMAP *user_cols_written_bitmap;
5596   bool avoidNdbApiWriteOp = false; /* ndb_write_row defaults to write */
5597 #ifdef HAVE_NDB_BINLOG
5598   /* Conflict resolution in slave thread */
5599   if (thd->slave_thread)
5600   {
5601     bool conflict_handled = false;
5602 
5603     if (unlikely((error = prepare_conflict_detection(WRITE_ROW,
5604                                                      key_rec,
5605                                                      m_ndb_record,
5606                                                      NULL,    /* old_data */
5607                                                      record,  /* new_data */
5608                                                      table->write_set,
5609                                                      trans,
5610                                                      NULL,    /* code */
5611                                                      &options,
5612                                                      conflict_handled,
5613                                                      avoidNdbApiWriteOp))))
5614       DBUG_RETURN(error);
5615 
5616     if (unlikely(conflict_handled))
5617     {
5618       /* No need to continue with operation definition */
5619       /* TODO : Ensure batch execution */
5620       DBUG_RETURN(0);
5621     }
5622   };
5623 #endif
5624 
5625   if (m_use_write &&
5626       !avoidNdbApiWriteOp)
5627   {
5628     uchar* mask;
5629 
5630     if (applying_binlog(thd))
5631     {
5632       /*
5633         Use write_set when applying binlog to avoid trampling
5634         unchanged columns
5635       */
5636       user_cols_written_bitmap= table->write_set;
5637       mask= (uchar *)(user_cols_written_bitmap->bitmap);
5638     }
5639     else
5640     {
5641       /* Ignore write_set for REPLACE command */
5642       user_cols_written_bitmap= NULL;
5643       mask= NULL;
5644     }
5645     /* TODO : Add conflict detection etc when interpreted write supported */
5646     op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,
5647                           (char *)record, mask,
5648                           poptions, sizeof(NdbOperation::OperationOptions));
5649   }
5650   else
5651   {
5652     uchar *mask;
5653 
5654     /* Check whether Ndb table definition includes any default values. */
5655     if (m_table->hasDefaultValues())
5656     {
5657       DBUG_PRINT("info", ("Not sending values for native defaulted columns"));
5658 
5659       /*
5660         If Ndb is unaware of the table's defaults, we must provide all column values to the insert.
5661         This is done using a NULL column mask.
5662         If Ndb is aware of the table's defaults, we only need to provide
5663         the columns explicitly mentioned in the write set,
5664         plus any extra columns required due to bug#41616.
5665         plus the primary key columns required due to bug#42238.
5666       */
5667       /*
5668         The following code for setting user_cols_written_bitmap
5669         should be removed after BUG#41616 and Bug#42238 are fixed
5670       */
5671       /* Copy table write set so that we can add to it */
5672       user_cols_written_bitmap= &tmpBitmap;
5673       bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,
5674                   table->write_set->n_bits, false);
5675       bitmap_copy(user_cols_written_bitmap, table->write_set);
5676 
5677       for (uint i= 0; i < table->s->fields; i++)
5678       {
5679         Field *field= table->field[i];
5680         DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "
5681                             "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",
5682                             i,
5683                             field->field_index,
5684                             field->real_type(),
5685                             field->flags & NO_DEFAULT_VALUE_FLAG,
5686                             field->flags & PRI_KEY_FLAG));
5687         if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616
5688                              PRI_KEY_FLAG)) ||       // bug 42238
5689             ! type_supports_default_value(field->real_type()))
5690         {
5691           bitmap_set_bit(user_cols_written_bitmap, field->field_index);
5692         }
5693       }
5694 
5695       mask= (uchar *)(user_cols_written_bitmap->bitmap);
5696     }
5697     else
5698     {
5699       /* No defaults in kernel, provide all columns ourselves */
5700       DBUG_PRINT("info", ("No native defaults, sending all values"));
5701       user_cols_written_bitmap= NULL;
5702       mask = NULL;
5703     }
5704 
5705     /* Using insert, we write all non default columns */
5706     op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,
5707                            (char *)record, mask, // Default value should be masked
5708                            poptions, sizeof(NdbOperation::OperationOptions));
5709   }
5710   if (!(op))
5711     ERR_RETURN(trans->getNdbError());
5712 
5713   bool do_batch= !need_flush &&
5714     (batched_update || thd_allow_batch(thd));
5715   uint blob_count= 0;
5716   if (table_share->blob_fields > 0)
5717   {
5718     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5719     /* Set Blob values for all columns updated by the operation */
5720     int res= set_blob_values(op, record - table->record[0],
5721                              user_cols_written_bitmap, &blob_count, do_batch);
5722     dbug_tmp_restore_column_map(table->read_set, old_map);
5723     if (res != 0)
5724       DBUG_RETURN(res);
5725   }
5726 
5727   m_rows_changed++;
5728 
5729   /*
5730     Execute write operation
5731     NOTE When doing inserts with many values in
5732     each INSERT statement it should not be necessary
5733     to NoCommit the transaction between each row.
5734     Find out how this is detected!
5735   */
5736   m_rows_inserted++;
5737   no_uncommitted_rows_update(1);
5738   if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||
5739       primary_key_update ||
5740       need_flush)
5741   {
5742     int res= flush_bulk_insert();
5743     if (res != 0)
5744     {
5745       m_skip_auto_increment= TRUE;
5746       DBUG_RETURN(res);
5747     }
5748   }
5749   if ((has_auto_increment) && (m_skip_auto_increment))
5750   {
5751     int ret_val;
5752     if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5753     {
5754       DBUG_RETURN(ret_val);
5755     }
5756   }
5757   m_skip_auto_increment= TRUE;
5758 
5759   DBUG_PRINT("exit",("ok"));
5760   DBUG_RETURN(0);
5761 }
5762 
5763 
5764 /* Compare if an update changes the primary key in a row. */
primary_key_cmp(const uchar * old_row,const uchar * new_row)5765 int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)
5766 {
5767   uint keynr= table_share->primary_key;
5768   KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
5769   KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;
5770 
5771   for (; key_part != end ; key_part++)
5772   {
5773     if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))
5774       continue;
5775 
5776     /* The primary key does not allow NULLs. */
5777     assert(!key_part->null_bit);
5778 
5779     if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
5780     {
5781 
5782       if (key_part->field->cmp_binary((old_row + key_part->offset),
5783                                       (new_row + key_part->offset),
5784                                       (ulong) key_part->length))
5785         return 1;
5786     }
5787     else
5788     {
5789       if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
5790                  key_part->length))
5791         return 1;
5792     }
5793   }
5794   return 0;
5795 }
5796 
5797 #ifdef HAVE_NDB_BINLOG
5798 
5799 static Ndb_exceptions_data StaticRefreshExceptionsData=
5800   { NULL, NULL, NULL, NULL, NULL, NULL, NULL, REFRESH_ROW, false, 0 };
5801 
5802 static int
handle_row_conflict(NDB_CONFLICT_FN_SHARE * cfn_share,const char * table_name,bool table_has_blobs,const char * handling_type,const NdbRecord * key_rec,const NdbRecord * data_rec,const uchar * old_row,const uchar * new_row,enum_conflicting_op_type op_type,enum_conflict_cause conflict_cause,const NdbError & conflict_error,NdbTransaction * conflict_trans,const MY_BITMAP * write_set,Uint64 transaction_id)5803 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
5804                     const char* table_name,
5805                     bool table_has_blobs,
5806                     const char* handling_type,
5807                     const NdbRecord* key_rec,
5808                     const NdbRecord* data_rec,
5809                     const uchar* old_row,
5810                     const uchar* new_row,
5811                     enum_conflicting_op_type op_type,
5812                     enum_conflict_cause conflict_cause,
5813                     const NdbError& conflict_error,
5814                     NdbTransaction* conflict_trans,
5815                     const MY_BITMAP *write_set,
5816                     Uint64 transaction_id)
5817 {
5818   DBUG_ENTER("handle_row_conflict");
5819 
5820   const uchar* row = (op_type == DELETE_ROW)? old_row : new_row;
5821   /*
5822      We will refresh the row if the conflict function requires
5823      it, or if we are handling a transactional conflict.
5824   */
5825   bool refresh_row =
5826     (conflict_cause == TRANS_IN_CONFLICT) ||
5827     (cfn_share &&
5828      (cfn_share->m_flags & CFF_REFRESH_ROWS));
5829 
5830   if (refresh_row)
5831   {
5832     /* A conflict has been detected between an applied replicated operation
5833      * and the data in the DB.
5834      * The attempt to change the local DB will have been rejected.
5835      * We now take steps to generate a refresh Binlog event so that
5836      * other clusters will be re-aligned.
5837      */
5838     DBUG_PRINT("info", ("Conflict on table %s.  Operation type : %s, "
5839                         "conflict cause :%s, conflict error : %u : %s",
5840                         table_name,
5841                         ((op_type == WRITE_ROW)? "WRITE_ROW":
5842                          (op_type == UPDATE_ROW)? "UPDATE_ROW":
5843                          "DELETE_ROW"),
5844                         ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":
5845                          (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":
5846                          "ROW_IN_CONFLICT"),
5847                         conflict_error.code,
5848                         conflict_error.message));
5849 
5850     assert(key_rec != NULL);
5851     assert(row != NULL);
5852 
5853     do
5854     {
5855       /* We cannot refresh a row which has Blobs, as we do not support
5856        * Blob refresh yet.
5857        * Rows implicated by a transactional conflict function may have
5858        * Blobs.
5859        * We will generate an error in this case
5860        */
5861       if (table_has_blobs)
5862       {
5863         char msg[FN_REFLEN];
5864         my_snprintf(msg, sizeof(msg), "%s conflict handling "
5865                     "on table %s failed as table has Blobs which cannot be refreshed.",
5866                     handling_type,
5867                     table_name);
5868 
5869         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5870                             ER_EXCEPTIONS_WRITE_ERROR,
5871                             ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
5872 
5873         DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
5874       }
5875 
5876       /* When the slave splits an epoch into batches, a conflict row detected
5877        * and refreshed in an early batch can be written to by operations in
5878        * a later batch.  As the operations will not have applied, and the
5879        * row has already been refreshed, we need not attempt to refresh
5880        * it again
5881        */
5882       if ((conflict_cause == ROW_IN_CONFLICT) &&
5883           (conflict_error.code == (int) error_op_after_refresh_op))
5884       {
5885         /* Attempt to apply an operation after the row was refreshed
5886          * Ignore the error
5887          */
5888         DBUG_PRINT("info", ("Operation after refresh error - ignoring"));
5889         break;
5890       }
5891 
5892       /* When a delete operation finds that the row does not exist, it indicates
5893        * a DELETE vs DELETE conflict.  If we refresh the row then we can get
5894        * non deterministic behaviour depending on slave batching as follows :
5895        *   Row is deleted
5896        *
5897        *     Case 1
5898        *       Slave applied DELETE, INSERT in 1 batch
5899        *
5900        *         After first batch, the row is present (due to INSERT), it is
5901        *         refreshed.
5902        *
5903        *     Case 2
5904        *       Slave applied DELETE in 1 batch, INSERT in 2nd batch
5905        *
5906        *         After first batch, the row is not present, it is refreshed
5907        *         INSERT is then rejected.
5908        *
5909        * The problem of not being able to 'record' a DELETE vs DELETE conflict
5910        * is known.  We attempt at least to give consistent behaviour for
5911        * DELETE vs DELETE conflicts by :
5912        *   NOT refreshing a row when a DELETE vs DELETE conflict is detected
5913        * This should map all batching scenarios onto Case1.
5914        */
5915       if ((op_type == DELETE_ROW) &&
5916           (conflict_cause == ROW_DOES_NOT_EXIST))
5917       {
5918         g_ndb_slave_state.current_delete_delete_count++;
5919         DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));
5920         break;
5921       }
5922 
5923       /*
5924         We give the refresh operation some 'exceptions data', so that
5925         it can be identified as part of conflict resolution when
5926         handling operation errors.
5927         Specifically we need to be able to handle duplicate row
5928         refreshes.
5929         As there is no unique exceptions data, we use a singleton.
5930 
5931         We also need to 'force' the ANYVALUE of the row to 0 to
5932         indicate that the refresh is locally-sourced.
5933         Otherwise we can 'pickup' the ANYVALUE of a previous
5934         update to the row.
5935         If some previous update in this transaction came from a
5936         Slave, then using its ANYVALUE can result in that Slave
5937         ignoring this correction.
5938       */
5939       NdbOperation::OperationOptions options;
5940       options.optionsPresent =
5941         NdbOperation::OperationOptions::OO_CUSTOMDATA |
5942         NdbOperation::OperationOptions::OO_ANYVALUE;
5943       options.customData = &StaticRefreshExceptionsData;
5944       options.anyValue = 0;
5945 
5946       /* Use AnyValue to indicate that this is a refreshTuple op */
5947       ndbcluster_anyvalue_set_refresh_op(options.anyValue);
5948 
5949       /* Create a refresh to operation to realign other clusters */
5950       // TODO Do we ever get non-PK key?
5951       //      Keyless table?
5952       //      Unique index
5953       const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,
5954                                                                    (const char*) row,
5955                                                                    &options,
5956                                                                    sizeof(options));
5957       if (!refresh_op)
5958       {
5959         NdbError err = conflict_trans->getNdbError();
5960 
5961         if (err.status == NdbError::TemporaryError)
5962         {
5963           /* Slave will roll back and retry entire transaction. */
5964           ERR_RETURN(err);
5965         }
5966         else
5967         {
5968           char msg[FN_REFLEN];
5969           my_snprintf(msg, sizeof(msg), "Row conflict handling "
5970                       "on table %s hit Ndb error %d '%s'",
5971                       table_name,
5972                       err.code,
5973                       err.message);
5974           push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5975                               ER_EXCEPTIONS_WRITE_ERROR,
5976                               ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
5977           /* Slave will stop replication. */
5978           DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
5979         }
5980       }
5981     } while(0); // End of 'refresh' block
5982   }
5983 
5984   DBUG_PRINT("info", ("Table %s does%s have an exceptions table",
5985                       table_name,
5986                       (cfn_share && cfn_share->m_ex_tab_writer.hasTable())
5987                       ? "" : " not"));
5988   if (cfn_share &&
5989       cfn_share->m_ex_tab_writer.hasTable())
5990   {
5991     NdbError err;
5992     if (cfn_share->m_ex_tab_writer.writeRow(conflict_trans,
5993                                             key_rec,
5994                                             data_rec,
5995                                             ::server_id,
5996                                             ndb_mi_get_master_server_id(),
5997                                             g_ndb_slave_state.current_master_server_epoch,
5998                                             old_row,
5999                                             new_row,
6000                                             op_type,
6001                                             conflict_cause,
6002                                             transaction_id,
6003                                             write_set,
6004                                             err) != 0)
6005     {
6006       if (err.code != 0)
6007       {
6008         if (err.status == NdbError::TemporaryError)
6009         {
6010           /* Slave will roll back and retry entire transaction. */
6011           ERR_RETURN(err);
6012         }
6013         else
6014         {
6015           char msg[FN_REFLEN];
6016           my_snprintf(msg, sizeof(msg), "%s conflict handling "
6017                       "on table %s hit Ndb error %d '%s'",
6018                       handling_type,
6019                       table_name,
6020                       err.code,
6021                       err.message);
6022           push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6023                               ER_EXCEPTIONS_WRITE_ERROR,
6024                               ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
6025           /* Slave will stop replication. */
6026           DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
6027         }
6028       }
6029     }
6030   } /* if (cfn_share->m_ex_tab != NULL) */
6031 
6032   DBUG_RETURN(0);
6033 }
6034 #endif /* HAVE_NDB_BINLOG */
6035 
6036 /**
6037   Update one record in NDB using primary key.
6038 */
6039 
start_bulk_update()6040 bool ha_ndbcluster::start_bulk_update()
6041 {
6042   DBUG_ENTER("ha_ndbcluster::start_bulk_update");
6043   if (!m_use_write && m_ignore_dup_key)
6044   {
6045     DBUG_PRINT("info", ("Batching turned off as duplicate key is "
6046                         "ignored by using peek_row"));
6047     DBUG_RETURN(TRUE);
6048   }
6049   DBUG_RETURN(FALSE);
6050 }
6051 
bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)6052 int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,
6053                                    uint *dup_key_found)
6054 {
6055   DBUG_ENTER("ha_ndbcluster::bulk_update_row");
6056   *dup_key_found= 0;
6057   DBUG_RETURN(ndb_update_row(old_data, new_data, 1));
6058 }
6059 
exec_bulk_update(uint * dup_key_found)6060 int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)
6061 {
6062   NdbTransaction* trans= m_thd_ndb->trans;
6063   DBUG_ENTER("ha_ndbcluster::exec_bulk_update");
6064   *dup_key_found= 0;
6065 
6066   // m_handler must be NULL or point to _this_ handler instance
6067   assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
6068 
6069   if (m_thd_ndb->m_handler &&
6070       m_read_before_write_removal_possible)
6071   {
6072     /*
6073       This is an autocommit involving only one table and rbwr is on
6074 
6075       Commit the autocommit transaction early(before the usual place
6076       in ndbcluster_commit) in order to:
6077       1) save one round trip, "no-commit+commit" converted to "commit"
6078       2) return the correct number of updated and affected rows
6079          to the update loop(which will ask handler in rbwr mode)
6080     */
6081     DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
6082     uint ignore_count= 0;
6083     const int ignore_error= 1;
6084     if (execute_commit(m_thd_ndb, trans,
6085                        m_thd_ndb->m_force_send, ignore_error,
6086                        &ignore_count) != 0)
6087     {
6088       no_uncommitted_rows_execute_failure();
6089       DBUG_RETURN(ndb_err(trans));
6090     }
6091     THD *thd= table->in_use;
6092     if (!applying_binlog(thd))
6093     {
6094       DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
6095       assert(m_rows_changed >= ignore_count);
6096       assert(m_rows_updated >= ignore_count);
6097       m_rows_changed-= ignore_count;
6098       m_rows_updated-= ignore_count;
6099     }
6100     DBUG_RETURN(0);
6101   }
6102 
6103   if (m_thd_ndb->m_unsent_bytes == 0)
6104   {
6105     DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
6106     DBUG_RETURN(0);
6107   }
6108 
6109   if (thd_allow_batch(table->in_use))
6110   {
6111     /*
6112       Turned on by @@transaction_allow_batching=ON
6113       or implicitly by slave exec thread
6114     */
6115     DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
6116     DBUG_RETURN(0);
6117   }
6118 
6119   if (m_thd_ndb->m_handler &&
6120       !m_blobs_pending)
6121   {
6122     // Execute at commit time(in 'ndbcluster_commit') to save a round trip
6123     DBUG_PRINT("exit", ("skip execute - simple autocommit"));
6124     DBUG_RETURN(0);
6125   }
6126 
6127   uint ignore_count= 0;
6128   if (execute_no_commit(m_thd_ndb, trans,
6129                         m_ignore_no_key || m_read_before_write_removal_used,
6130                         &ignore_count) != 0)
6131   {
6132     no_uncommitted_rows_execute_failure();
6133     DBUG_RETURN(ndb_err(trans));
6134   }
6135   THD *thd= table->in_use;
6136   if (!applying_binlog(thd))
6137   {
6138     assert(m_rows_changed >= ignore_count);
6139     assert(m_rows_updated >= ignore_count);
6140     m_rows_changed-= ignore_count;
6141     m_rows_updated-= ignore_count;
6142   }
6143   DBUG_RETURN(0);
6144 }
6145 
end_bulk_update()6146 void ha_ndbcluster::end_bulk_update()
6147 {
6148   DBUG_ENTER("ha_ndbcluster::end_bulk_update");
6149   DBUG_VOID_RETURN;
6150 }
6151 
update_row(const uchar * old_data,uchar * new_data)6152 int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)
6153 {
6154   return ndb_update_row(old_data, new_data, 0);
6155 }
6156 
6157 void
setup_key_ref_for_ndb_record(const NdbRecord ** key_rec,const uchar ** key_row,const uchar * record,bool use_active_index)6158 ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,
6159                                             const uchar **key_row,
6160                                             const uchar *record,
6161                                             bool use_active_index)
6162 {
6163   DBUG_ENTER("setup_key_ref_for_ndb_record");
6164   if (use_active_index)
6165   {
6166     /* Use unique key to access table */
6167     DBUG_PRINT("info", ("Using unique index (%u)", active_index));
6168     assert((table->key_info[active_index].flags & HA_NOSAME));
6169     /* Can't use key if we didn't read it first */
6170     assert(bitmap_is_subset(m_key_fields[active_index], table->read_set));
6171     *key_rec= m_index[active_index].ndb_unique_record_row;
6172     *key_row= record;
6173   }
6174   else if (table_share->primary_key != MAX_KEY)
6175   {
6176     /* Use primary key to access table */
6177     DBUG_PRINT("info", ("Using primary key"));
6178     /* Can't use pk if we didn't read it first */
6179     assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
6180     *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
6181     *key_row= record;
6182   }
6183   else
6184   {
6185     /* Use hidden primary key previously read into m_ref. */
6186     DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));
6187     /* Can't use hidden pk if we didn't read it first */
6188     assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
6189     assert(m_read_before_write_removal_used == false);
6190     *key_rec= m_ndb_hidden_key_record;
6191     *key_row= (const uchar *)(&m_ref);
6192   }
6193   DBUG_VOID_RETURN;
6194 }
6195 
6196 
6197 /*
6198   Update one record in NDB using primary key
6199 */
6200 
ndb_update_row(const uchar * old_data,uchar * new_data,int is_bulk_update)6201 int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,
6202                                   int is_bulk_update)
6203 {
6204   THD *thd= table->in_use;
6205   Thd_ndb *thd_ndb= m_thd_ndb;
6206   NdbScanOperation* cursor= m_active_cursor;
6207   const NdbOperation *op;
6208   uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);
6209   int error;
6210   longlong func_value;
6211   Uint32 func_value_uint32;
6212   bool have_pk= (table_share->primary_key != MAX_KEY);
6213   bool pk_update= (!m_read_before_write_removal_possible &&
6214                    have_pk &&
6215                    bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&
6216                    primary_key_cmp(old_data, new_data));
6217   bool batch_allowed= !m_update_cannot_batch &&
6218     (is_bulk_update || thd_allow_batch(thd));
6219   NdbOperation::SetValueSpec sets[2];
6220   Uint32 num_sets= 0;
6221 
6222   DBUG_ENTER("ndb_update_row");
6223 
6224   /* Start a transaction now if none available
6225    * (Manual Binlog application...)
6226    */
6227   /* TODO : Consider hinting */
6228   if (unlikely((!m_thd_ndb->trans) &&
6229                !get_transaction(error)))
6230   {
6231     DBUG_RETURN(error);
6232   }
6233 
6234   NdbTransaction *trans= m_thd_ndb->trans;
6235   assert(trans);
6236 
6237   error = check_slave_state(thd);
6238   if (unlikely(error))
6239     DBUG_RETURN(error);
6240 
6241   /*
6242    * If IGNORE the ignore constraint violations on primary and unique keys,
6243    * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
6244    */
6245   if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
6246                            thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
6247   {
6248     NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;
6249     int peek_res= peek_indexed_rows(new_data, write_op);
6250 
6251     if (!peek_res)
6252     {
6253       DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
6254     }
6255     if (peek_res != HA_ERR_KEY_NOT_FOUND)
6256       DBUG_RETURN(peek_res);
6257   }
6258 
6259   ha_statistic_increment(&SSV::ha_update_count);
6260 
6261   bool skip_partition_for_unique_index= FALSE;
6262   if (m_use_partition_pruning)
6263   {
6264     if (!cursor && m_read_before_write_removal_used)
6265     {
6266       ndb_index_type type= get_index_type(active_index);
6267       /*
6268         Ndb unique indexes are global so when
6269         m_read_before_write_removal_used is active
6270         the unique index can be used directly for update
6271         without finding the partitions
6272       */
6273       if (type == UNIQUE_INDEX ||
6274           type == UNIQUE_ORDERED_INDEX)
6275       {
6276         skip_partition_for_unique_index= TRUE;
6277         goto skip_partition_pruning;
6278       }
6279     }
6280     if ((error= get_parts_for_update(old_data, new_data, table->record[0],
6281                                      m_part_info, &old_part_id, &new_part_id,
6282                                      &func_value)))
6283     {
6284       m_part_info->err_value= func_value;
6285       DBUG_RETURN(error);
6286     }
6287     DBUG_PRINT("info", ("old_part_id: %u  new_part_id: %u", old_part_id, new_part_id));
6288   skip_partition_pruning:
6289     (void)0;
6290   }
6291 
6292   /*
6293    * Check for update of primary key or partition change
6294    * for special handling
6295    */
6296   if (pk_update || old_part_id != new_part_id)
6297   {
6298     DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data));
6299   }
6300   /*
6301     If we are updating a unique key with auto_increment
6302     then we need to update the auto_increment counter
6303    */
6304   if (table->found_next_number_field &&
6305       bitmap_is_set(table->write_set,
6306 		    table->found_next_number_field->field_index) &&
6307       (error= set_auto_inc(thd, table->found_next_number_field)))
6308   {
6309     DBUG_RETURN(error);
6310   }
6311   /*
6312     Set only non-primary-key attributes.
6313     We already checked that any primary key attribute in write_set has no
6314     real changes.
6315   */
6316   bitmap_copy(&m_bitmap, table->write_set);
6317   bitmap_subtract(&m_bitmap, m_pk_bitmap_p);
6318   uchar *mask= (uchar *)(m_bitmap.bitmap);
6319   assert(!pk_update);
6320 
6321   NdbOperation::OperationOptions *poptions = NULL;
6322   NdbOperation::OperationOptions options;
6323   options.optionsPresent=0;
6324 
6325   /* Need to set the value of any user-defined partitioning function.
6326      (excecpt for when using unique index)
6327   */
6328   if (m_user_defined_partitioning && !skip_partition_for_unique_index)
6329   {
6330     if (func_value >= INT_MAX32)
6331       func_value_uint32= INT_MAX32;
6332     else
6333       func_value_uint32= (uint32)func_value;
6334     sets[num_sets].column= get_partition_id_column();
6335     sets[num_sets].value= &func_value_uint32;
6336     num_sets++;
6337 
6338     if (!cursor)
6339     {
6340       options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
6341       options.partitionId= new_part_id;
6342     }
6343   }
6344 
6345   eventSetAnyValue(thd, &options);
6346 
6347   const bool need_flush=
6348       thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);
6349 
6350  const Uint32 authorValue = 1;
6351  if ((thd->slave_thread) &&
6352      (m_table->getExtraRowAuthorBits()))
6353  {
6354    /* Set author to indicate slave updated last */
6355    sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
6356    sets[num_sets].value= &authorValue;
6357    num_sets++;
6358  }
6359 
6360  if (num_sets)
6361  {
6362    options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;
6363    options.extraSetValues= sets;
6364    options.numExtraSetValues= num_sets;
6365  }
6366 
6367   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
6368   {
6369     options.optionsPresent |=
6370       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
6371   }
6372 
6373   if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
6374   {
6375     DBUG_PRINT("info", ("Disabling foreign keys"));
6376     options.optionsPresent |=
6377       NdbOperation::OperationOptions::OO_DISABLE_FK;
6378   }
6379 
6380   if (cursor)
6381   {
6382     /*
6383       We are scanning records and want to update the record
6384       that was just found, call updateCurrentTuple on the cursor
6385       to take over the lock to a new update operation
6386       And thus setting the primary key of the record from
6387       the active record in cursor
6388     */
6389     DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",
6390                         table->write_set->bitmap[0]));
6391 
6392     if (options.optionsPresent != 0)
6393       poptions = &options;
6394 
6395     if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,
6396                                          (const char*)new_data, mask,
6397                                          poptions,
6398                                          sizeof(NdbOperation::OperationOptions))))
6399       ERR_RETURN(trans->getNdbError());
6400 
6401     m_lock_tuple= FALSE;
6402     thd_ndb->m_unsent_bytes+= 12;
6403   }
6404   else
6405   {
6406     const NdbRecord *key_rec;
6407     const uchar *key_row;
6408     setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,
6409 				 m_read_before_write_removal_used);
6410 
6411     bool avoidNdbApiWriteOp = true; /* Default update op for ndb_update_row */
6412 #ifdef HAVE_NDB_BINLOG
6413     Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
6414     NdbInterpretedCode code(m_table, buffer,
6415                             sizeof(buffer)/sizeof(buffer[0]));
6416 
6417     if (thd->slave_thread)
6418     {
6419       bool conflict_handled = false;
6420       /* Conflict resolution in slave thread. */
6421       DBUG_PRINT("info", ("Slave thread, preparing conflict resolution for update with mask : %x", *((Uint32*)mask)));
6422 
6423       if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,
6424                                                        key_rec,
6425                                                        m_ndb_record,
6426                                                        old_data,
6427                                                        new_data,
6428                                                        table->write_set,
6429                                                        trans,
6430                                                        &code,
6431                                                        &options,
6432                                                        conflict_handled,
6433                                                        avoidNdbApiWriteOp))))
6434         DBUG_RETURN(error);
6435 
6436       if (unlikely(conflict_handled))
6437       {
6438         /* No need to continue with operation defintion */
6439         /* TODO : Ensure batch execution */
6440         DBUG_RETURN(0);
6441       }
6442     }
6443 #endif /* HAVE_NDB_BINLOG */
6444     if (options.optionsPresent !=0)
6445       poptions= &options;
6446 
6447     if (likely(avoidNdbApiWriteOp))
6448     {
6449       if (!(op= trans->updateTuple(key_rec, (const char *)key_row,
6450                                    m_ndb_record, (const char*)new_data, mask,
6451                                    poptions,
6452                                    sizeof(NdbOperation::OperationOptions))))
6453         ERR_RETURN(trans->getNdbError());
6454     }
6455     else
6456     {
6457       DBUG_PRINT("info", ("Update op using writeTuple"));
6458       if (!(op= trans->writeTuple(key_rec, (const char *)key_row,
6459                                   m_ndb_record, (const char*)new_data, mask,
6460                                   poptions,
6461                                   sizeof(NdbOperation::OperationOptions))))
6462         ERR_RETURN(trans->getNdbError());
6463     }
6464   }
6465 
6466   uint blob_count= 0;
6467   if (uses_blob_value(table->write_set))
6468   {
6469     int row_offset= (int)(new_data - table->record[0]);
6470     int res= set_blob_values(op, row_offset, table->write_set, &blob_count,
6471                              (batch_allowed && !need_flush));
6472     if (res != 0)
6473       DBUG_RETURN(res);
6474   }
6475   uint ignore_count= 0;
6476   /*
6477     Batch update operation if we are doing a scan for update, unless
6478     there exist UPDATE AFTER triggers
6479   */
6480   if (m_update_cannot_batch ||
6481       !(cursor || (batch_allowed && have_pk)) ||
6482       need_flush)
6483   {
6484     if (execute_no_commit(m_thd_ndb, trans,
6485                           m_ignore_no_key || m_read_before_write_removal_used,
6486                           &ignore_count) != 0)
6487     {
6488       no_uncommitted_rows_execute_failure();
6489       DBUG_RETURN(ndb_err(trans));
6490     }
6491   }
6492   else if (blob_count > 0)
6493     m_blobs_pending= TRUE;
6494 
6495   m_rows_changed++;
6496   m_rows_updated++;
6497 
6498   if (!applying_binlog(thd))
6499   {
6500     assert(m_rows_changed >= ignore_count);
6501     assert(m_rows_updated >= ignore_count);
6502     m_rows_changed-= ignore_count;
6503     m_rows_updated-= ignore_count;
6504   }
6505 
6506   DBUG_RETURN(0);
6507 }
6508 
6509 
6510 /*
6511   handler delete interface
6512 */
6513 
delete_row(const uchar * record)6514 int ha_ndbcluster::delete_row(const uchar *record)
6515 {
6516   return ndb_delete_row(record, FALSE);
6517 }
6518 
start_bulk_delete()6519 bool ha_ndbcluster::start_bulk_delete()
6520 {
6521   DBUG_ENTER("start_bulk_delete");
6522   m_is_bulk_delete = true;
6523   DBUG_RETURN(0); // Bulk delete used by handler
6524 }
6525 
end_bulk_delete()6526 int ha_ndbcluster::end_bulk_delete()
6527 {
6528   NdbTransaction* trans= m_thd_ndb->trans;
6529   DBUG_ENTER("end_bulk_delete");
6530   assert(m_is_bulk_delete); // Don't allow end() without start()
6531   m_is_bulk_delete = false;
6532 
6533   // m_handler must be NULL or point to _this_ handler instance
6534   assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
6535 
6536   if (m_thd_ndb->m_handler &&
6537       m_read_before_write_removal_possible)
6538   {
6539     /*
6540       This is an autocommit involving only one table and rbwr is on
6541 
6542       Commit the autocommit transaction early(before the usual place
6543       in ndbcluster_commit) in order to:
6544       1) save one round trip, "no-commit+commit" converted to "commit"
6545       2) return the correct number of updated and affected rows
6546          to the delete loop(which will ask handler in rbwr mode)
6547     */
6548     DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
6549     uint ignore_count= 0;
6550     const int ignore_error= 1;
6551     if (execute_commit(m_thd_ndb, trans,
6552                        m_thd_ndb->m_force_send, ignore_error,
6553                        &ignore_count) != 0)
6554     {
6555       no_uncommitted_rows_execute_failure();
6556       m_rows_deleted = 0;
6557       DBUG_RETURN(ndb_err(trans));
6558     }
6559     THD *thd= table->in_use;
6560     if (!applying_binlog(thd))
6561     {
6562       DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
6563       assert(m_rows_deleted >= ignore_count);
6564       m_rows_deleted-= ignore_count;
6565     }
6566     DBUG_RETURN(0);
6567   }
6568 
6569   if (m_thd_ndb->m_unsent_bytes == 0)
6570   {
6571     DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
6572     DBUG_RETURN(0);
6573   }
6574 
6575   if (thd_allow_batch(table->in_use))
6576   {
6577     /*
6578       Turned on by @@transaction_allow_batching=ON
6579       or implicitly by slave exec thread
6580     */
6581     DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
6582     DBUG_RETURN(0);
6583   }
6584 
6585   if (m_thd_ndb->m_handler)
6586   {
6587     // Execute at commit time(in 'ndbcluster_commit') to save a round trip
6588     DBUG_PRINT("exit", ("skip execute - simple autocommit"));
6589     DBUG_RETURN(0);
6590   }
6591 
6592   uint ignore_count= 0;
6593   if (execute_no_commit(m_thd_ndb, trans,
6594                         m_ignore_no_key || m_read_before_write_removal_used,
6595                         &ignore_count) != 0)
6596   {
6597     no_uncommitted_rows_execute_failure();
6598     DBUG_RETURN(ndb_err(trans));
6599   }
6600 
6601   THD *thd= table->in_use;
6602   if (!applying_binlog(thd))
6603   {
6604     assert(m_rows_deleted >= ignore_count);
6605     m_rows_deleted-= ignore_count;
6606     no_uncommitted_rows_update(ignore_count);
6607   }
6608   DBUG_RETURN(0);
6609 }
6610 
6611 
6612 /**
6613   Delete one record from NDB, using primary key .
6614 */
6615 
ndb_delete_row(const uchar * record,bool primary_key_update)6616 int ha_ndbcluster::ndb_delete_row(const uchar *record,
6617                                   bool primary_key_update)
6618 {
6619   THD *thd= table->in_use;
6620   Thd_ndb *thd_ndb= m_thd_ndb;
6621   NdbScanOperation* cursor= m_active_cursor;
6622   const NdbOperation *op;
6623   uint32 part_id= ~uint32(0);
6624   int error;
6625   bool allow_batch= !m_delete_cannot_batch &&
6626     (m_is_bulk_delete || thd_allow_batch(thd));
6627 
6628   DBUG_ENTER("ndb_delete_row");
6629 
6630   /* Start a transaction now if none available
6631    * (Manual Binlog application...)
6632    */
6633   /* TODO : Consider hinting */
6634   if (unlikely((!m_thd_ndb->trans) &&
6635                !get_transaction(error)))
6636   {
6637     DBUG_RETURN(error);
6638   }
6639 
6640   NdbTransaction *trans= m_thd_ndb->trans;
6641   assert(trans);
6642 
6643   error = check_slave_state(thd);
6644   if (unlikely(error))
6645     DBUG_RETURN(error);
6646 
6647   ha_statistic_increment(&SSV::ha_delete_count);
6648   m_rows_changed++;
6649 
6650   bool skip_partition_for_unique_index= FALSE;
6651   if (m_use_partition_pruning)
6652   {
6653     if (!cursor && m_read_before_write_removal_used)
6654     {
6655       ndb_index_type type= get_index_type(active_index);
6656       /*
6657         Ndb unique indexes are global so when
6658         m_read_before_write_removal_used is active
6659         the unique index can be used directly for deleting
6660         without finding the partitions
6661       */
6662       if (type == UNIQUE_INDEX ||
6663           type == UNIQUE_ORDERED_INDEX)
6664       {
6665         skip_partition_for_unique_index= TRUE;
6666         goto skip_partition_pruning;
6667       }
6668     }
6669     if ((error= get_part_for_delete(record, table->record[0], m_part_info,
6670                                     &part_id)))
6671     {
6672       DBUG_RETURN(error);
6673     }
6674   skip_partition_pruning:
6675     (void)0;
6676   }
6677 
6678   NdbOperation::OperationOptions options;
6679   NdbOperation::OperationOptions *poptions = NULL;
6680   options.optionsPresent=0;
6681 
6682   eventSetAnyValue(thd, &options);
6683 
6684   /*
6685     Poor approx. let delete ~ tabsize / 4
6686   */
6687   uint delete_size= 12 + (m_bytes_per_write >> 2);
6688   const bool need_flush =
6689       thd_ndb->add_row_check_if_batch_full(delete_size);
6690 
6691   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
6692   {
6693     options.optionsPresent |=
6694       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
6695   }
6696 
6697   if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
6698   {
6699     DBUG_PRINT("info", ("Disabling foreign keys"));
6700     options.optionsPresent |=
6701       NdbOperation::OperationOptions::OO_DISABLE_FK;
6702   }
6703 
6704   if (cursor)
6705   {
6706     if (options.optionsPresent != 0)
6707       poptions = &options;
6708 
6709     /*
6710       We are scanning records and want to delete the record
6711       that was just found, call deleteTuple on the cursor
6712       to take over the lock to a new delete operation
6713       And thus setting the primary key of the record from
6714       the active record in cursor
6715     */
6716     DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
6717     if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,
6718                                          NULL, // result_row
6719                                          NULL, // result_mask
6720                                          poptions,
6721                                          sizeof(NdbOperation::OperationOptions))) == 0)
6722       ERR_RETURN(trans->getNdbError());
6723     m_lock_tuple= FALSE;
6724     thd_ndb->m_unsent_bytes+= 12;
6725 
6726     no_uncommitted_rows_update(-1);
6727     m_rows_deleted++;
6728 
6729     if (!(primary_key_update || m_delete_cannot_batch))
6730     {
6731       // If deleting from cursor, NoCommit will be handled in next_result
6732       DBUG_RETURN(0);
6733     }
6734   }
6735   else
6736   {
6737     const NdbRecord *key_rec;
6738     const uchar *key_row;
6739 
6740     if (m_user_defined_partitioning && !skip_partition_for_unique_index)
6741     {
6742       options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
6743       options.partitionId= part_id;
6744     }
6745 
6746     setup_key_ref_for_ndb_record(&key_rec, &key_row, record,
6747 				 m_read_before_write_removal_used);
6748 
6749 #ifdef HAVE_NDB_BINLOG
6750     Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
6751     NdbInterpretedCode code(m_table, buffer,
6752                             sizeof(buffer)/sizeof(buffer[0]));
6753     if (thd->slave_thread)
6754     {
6755        bool conflict_handled = false;
6756        bool dummy_delete_does_not_care = false;
6757 
6758       /* Conflict resolution in slave thread. */
6759       if (unlikely((error = prepare_conflict_detection(DELETE_ROW,
6760                                                        key_rec,
6761                                                        m_ndb_record,
6762                                                        key_row, /* old_data */
6763                                                        NULL,    /* new_data */
6764                                                        table->write_set,
6765                                                        trans,
6766                                                        &code,
6767                                                        &options,
6768                                                        conflict_handled,
6769                                                        dummy_delete_does_not_care))))
6770         DBUG_RETURN(error);
6771 
6772       if (unlikely(conflict_handled))
6773       {
6774         /* No need to continue with operation definition */
6775         /* TODO : Ensure batch execution */
6776         DBUG_RETURN(0);
6777       }
6778     }
6779 #endif /* HAVE_NDB_BINLOG */
6780     if (options.optionsPresent != 0)
6781       poptions= &options;
6782 
6783     if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,
6784                                 m_ndb_record,
6785                                 NULL, // row
6786                                 NULL, // mask
6787                                 poptions,
6788                                 sizeof(NdbOperation::OperationOptions))))
6789       ERR_RETURN(trans->getNdbError());
6790 
6791     no_uncommitted_rows_update(-1);
6792     m_rows_deleted++;
6793 
6794     /*
6795       Check if we can batch the delete.
6796 
6797       We don't batch deletes as part of primary key updates.
6798       We do not batch deletes on tables with no primary key. For such tables,
6799       replication uses full table scan to locate the row to delete. The
6800       problem is the following scenario when deleting 2 (or more) rows:
6801 
6802        1. Table scan to locate the first row.
6803        2. Delete the row, batched so no execute.
6804        3. Table scan to locate the second row is executed, along with the
6805           batched delete operation from step 2.
6806        4. The first row is returned from nextResult() (not deleted yet).
6807        5. The kernel deletes the row (operation from step 2).
6808        6. lockCurrentTuple() is called on the row returned in step 4. However,
6809           as that row is now deleted, the operation fails and the transaction
6810           is aborted.
6811        7. The delete of the second tuple now fails, as the transaction has
6812           been aborted.
6813     */
6814 
6815     if ( allow_batch &&
6816 	 table_share->primary_key != MAX_KEY &&
6817 	 !primary_key_update &&
6818 	 !need_flush)
6819     {
6820       DBUG_RETURN(0);
6821     }
6822   }
6823 
6824   // Execute delete operation
6825   uint ignore_count= 0;
6826   if (execute_no_commit(m_thd_ndb, trans,
6827                         m_ignore_no_key || m_read_before_write_removal_used,
6828                         &ignore_count) != 0)
6829   {
6830     no_uncommitted_rows_execute_failure();
6831     DBUG_RETURN(ndb_err(trans));
6832   }
6833   if (!primary_key_update)
6834   {
6835     if (!applying_binlog(thd))
6836     {
6837       assert(m_rows_deleted >= ignore_count);
6838       m_rows_deleted-= ignore_count;
6839       no_uncommitted_rows_update(ignore_count);
6840     }
6841   }
6842   DBUG_RETURN(0);
6843 }
6844 
6845 /**
6846   Unpack a record returned from a scan.
6847   We copy field-for-field to
6848    1. Avoid unnecessary copying for sparse rows.
6849    2. Properly initialize not used null bits.
6850   Note that we do not unpack all returned rows; some primary/unique key
6851   operations can read directly into the destination row.
6852 */
unpack_record(uchar * dst_row,const uchar * src_row)6853 void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)
6854 {
6855   int res;
6856   assert(src_row != NULL);
6857 
6858   my_ptrdiff_t dst_offset= dst_row - table->record[0];
6859   my_ptrdiff_t src_offset= src_row - table->record[0];
6860 
6861   /* Initialize the NULL bitmap. */
6862   memset(dst_row, 0xff, table->s->null_bytes);
6863 
6864   uchar *blob_ptr= m_blobs_buffer;
6865 
6866   for (uint i= 0; i < table_share->fields; i++)
6867   {
6868     Field *field= table->field[i];
6869     if (bitmap_is_set(table->read_set, i))
6870     {
6871       if (field->type() == MYSQL_TYPE_BIT)
6872       {
6873         Field_bit *field_bit= static_cast<Field_bit*>(field);
6874         if (!field->is_real_null(src_offset))
6875         {
6876           field->move_field_offset(src_offset);
6877           longlong value= field_bit->val_int();
6878           field->move_field_offset(dst_offset-src_offset);
6879           field_bit->set_notnull();
6880           /* Field_bit in DBUG requires the bit set in write_set for store(). */
6881           my_bitmap_map *old_map=
6882             dbug_tmp_use_all_columns(table, table->write_set);
6883           int res = field_bit->store(value, true);
6884           assert(res == 0); NDB_IGNORE_VALUE(res);
6885           dbug_tmp_restore_column_map(table->write_set, old_map);
6886           field->move_field_offset(-dst_offset);
6887         }
6888       }
6889       else if (field->flags & BLOB_FLAG)
6890       {
6891         Field_blob *field_blob= (Field_blob *)field;
6892         NdbBlob *ndb_blob= m_value[i].blob;
6893         /* unpack_record *only* called for scan result processing
6894          * *while* the scan is open and the Blob is active.
6895          * Verify Blob state to be certain.
6896          * Accessing PK/UK op Blobs after execute() is unsafe
6897          */
6898         assert(ndb_blob != 0);
6899         assert(ndb_blob->getState() == NdbBlob::Active);
6900         int isNull;
6901         res= ndb_blob->getNull(isNull);
6902         assert(res == 0);                  // Already succeeded once
6903         Uint64 len64= 0;
6904         field_blob->move_field_offset(dst_offset);
6905         if (!isNull)
6906         {
6907           res= ndb_blob->getLength(len64);
6908           assert(res == 0 && len64 <= (Uint64)0xffffffff);
6909 
6910           if(len64 > field_blob->max_data_length())
6911           {
6912             len64 = calc_ndb_blob_len(ndb_blob->getColumn()->getCharset(),
6913                                     blob_ptr, field_blob->max_data_length());
6914 
6915             // push a warning
6916             push_warning_printf(table->in_use, Sql_condition::SL_WARNING,
6917                         WARN_DATA_TRUNCATED,
6918                         "Truncated value from TEXT field \'%s\'", field_blob->field_name);
6919 
6920           }
6921           field->set_notnull();
6922         }
6923         /* Need not set_null(), as we initialized null bits to 1 above. */
6924         field_blob->set_ptr((uint32)len64, blob_ptr);
6925         field_blob->move_field_offset(-dst_offset);
6926         blob_ptr+= (len64 + 7) & ~((Uint64)7);
6927       }
6928       else
6929       {
6930         field->move_field_offset(src_offset);
6931         /* Normal field (not blob or bit type). */
6932         if (!field->is_null())
6933         {
6934           /* Only copy actually used bytes of varstrings. */
6935           uint32 actual_length= field_used_length(field);
6936           uchar *src_ptr= field->ptr;
6937           field->move_field_offset(dst_offset - src_offset);
6938           field->set_notnull();
6939           memcpy(field->ptr, src_ptr, actual_length);
6940           field->move_field_offset(-dst_offset);
6941         }
6942         else
6943           field->move_field_offset(-src_offset);
6944         /* No action needed for a NULL field. */
6945       }
6946     }
6947   }
6948 }
6949 
6950 
6951 /**
6952   Get the default value of the field from default_values of the table.
6953 */
get_default_value(void * def_val,Field * field)6954 static void get_default_value(void *def_val, Field *field)
6955 {
6956   assert(field != NULL);
6957 
6958   my_ptrdiff_t src_offset= field->table->default_values_offset();
6959 
6960   {
6961     if (bitmap_is_set(field->table->read_set, field->field_index))
6962     {
6963       if (field->type() == MYSQL_TYPE_BIT)
6964       {
6965         Field_bit *field_bit= static_cast<Field_bit*>(field);
6966         if (!field->is_real_null(src_offset))
6967         {
6968           field->move_field_offset(src_offset);
6969           longlong value= field_bit->val_int();
6970           /* Map to NdbApi format - two Uint32s */
6971           Uint32 out[2];
6972           out[0] = 0;
6973           out[1] = 0;
6974           for (int b=0; b < 64; b++)
6975           {
6976             out[b >> 5] |= (value & 1) << (b & 31);
6977 
6978             value= value >> 1;
6979           }
6980           memcpy(def_val, out, sizeof(longlong));
6981           field->move_field_offset(-src_offset);
6982         }
6983       }
6984       else if (field->flags & BLOB_FLAG)
6985       {
6986         assert(false);
6987       }
6988       else
6989       {
6990         field->move_field_offset(src_offset);
6991         /* Normal field (not blob or bit type). */
6992         if (!field->is_null())
6993         {
6994           /* Only copy actually used bytes of varstrings. */
6995           uint32 actual_length= field_used_length(field);
6996           uchar *src_ptr= field->ptr;
6997           field->set_notnull();
6998           memcpy(def_val, src_ptr, actual_length);
6999         }
7000         field->move_field_offset(-src_offset);
7001         /* No action needed for a NULL field. */
7002       }
7003     }
7004   }
7005 }
7006 
7007 /*
7008     DBUG_EXECUTE("value", print_results(););
7009 */
7010 
print_results()7011 void ha_ndbcluster::print_results()
7012 {
7013   DBUG_ENTER("print_results");
7014 
7015 #ifndef NDEBUG
7016 
7017   char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
7018   String type(buf_type, sizeof(buf_type), &my_charset_bin);
7019   String val(buf_val, sizeof(buf_val), &my_charset_bin);
7020   for (uint f= 0; f < table_share->fields; f++)
7021   {
7022     /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
7023     char buf[2000];
7024     Field *field;
7025     void* ptr;
7026     NdbValue value;
7027 
7028     buf[0]= 0;
7029     field= table->field[f];
7030     if (!(value= m_value[f]).ptr)
7031     {
7032       my_stpcpy(buf, "not read");
7033       goto print_value;
7034     }
7035 
7036     ptr= field->ptr;
7037 
7038     if (! (field->flags & BLOB_FLAG))
7039     {
7040       if (value.rec->isNULL())
7041       {
7042         my_stpcpy(buf, "NULL");
7043         goto print_value;
7044       }
7045       type.length(0);
7046       val.length(0);
7047       field->sql_type(type);
7048       field->val_str(&val);
7049       my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
7050     }
7051     else
7052     {
7053       NdbBlob *ndb_blob= value.blob;
7054       bool isNull= TRUE;
7055       assert(ndb_blob->getState() == NdbBlob::Active);
7056       ndb_blob->getNull(isNull);
7057       if (isNull)
7058         my_stpcpy(buf, "NULL");
7059     }
7060 
7061 print_value:
7062     DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
7063   }
7064 #endif
7065   DBUG_VOID_RETURN;
7066 }
7067 
7068 
index_init(uint index,bool sorted)7069 int ha_ndbcluster::index_init(uint index, bool sorted)
7070 {
7071   DBUG_ENTER("ha_ndbcluster::index_init");
7072   DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));
7073   active_index= index;
7074   m_sorted= sorted;
7075   /*
7076     Locks are are explicitly released in scan
7077     unless m_lock.type == TL_READ_HIGH_PRIORITY
7078     and no sub-sequent call to unlock_row()
7079   */
7080   m_lock_tuple= FALSE;
7081 
7082   if (table_share->primary_key == MAX_KEY &&
7083       m_use_partition_pruning)
7084   {
7085     bitmap_union(table->read_set, &m_part_info->full_part_field_set);
7086   }
7087 
7088   DBUG_RETURN(0);
7089 }
7090 
7091 
index_end()7092 int ha_ndbcluster::index_end()
7093 {
7094   DBUG_ENTER("ha_ndbcluster::index_end");
7095   DBUG_RETURN(close_scan());
7096 }
7097 
7098 /**
7099   Check if key contains null.
7100 */
7101 static
7102 int
check_null_in_key(const KEY * key_info,const uchar * key,uint key_len)7103 check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)
7104 {
7105   KEY_PART_INFO *curr_part, *end_part;
7106   const uchar* end_ptr= key + key_len;
7107   curr_part= key_info->key_part;
7108   end_part= curr_part + key_info->user_defined_key_parts;
7109 
7110   for (; curr_part != end_part && key < end_ptr; curr_part++)
7111   {
7112     if (curr_part->null_bit && *key)
7113       return 1;
7114 
7115     key += curr_part->store_length;
7116   }
7117   return 0;
7118 }
7119 
index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)7120 int ha_ndbcluster::index_read(uchar *buf,
7121                               const uchar *key, uint key_len,
7122                               enum ha_rkey_function find_flag)
7123 {
7124   key_range start_key, end_key, *end_key_p=NULL;
7125   bool descending= FALSE;
7126   DBUG_ENTER("ha_ndbcluster::index_read");
7127   DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
7128                        active_index, key_len, find_flag));
7129 
7130   start_key.key= key;
7131   start_key.length= key_len;
7132   start_key.flag= find_flag;
7133   switch (find_flag) {
7134   case HA_READ_KEY_EXACT:
7135     /**
7136      * Specify as a closed EQ_RANGE.
7137      * Setting HA_READ_AFTER_KEY seems odd, but this is according
7138      * to MySQL convention, see opt_range.cc.
7139      */
7140     end_key.key= key;
7141     end_key.length= key_len;
7142     end_key.flag= HA_READ_AFTER_KEY;
7143     end_key_p= &end_key;
7144     break;
7145   case HA_READ_KEY_OR_PREV:
7146   case HA_READ_BEFORE_KEY:
7147   case HA_READ_PREFIX_LAST:
7148   case HA_READ_PREFIX_LAST_OR_PREV:
7149     descending= TRUE;
7150     break;
7151   default:
7152     break;
7153   }
7154   const int error= read_range_first_to_buf(&start_key, end_key_p,
7155                                            descending,
7156                                            m_sorted, buf);
7157   table->status=error ? STATUS_NOT_FOUND: 0;
7158   DBUG_RETURN(error);
7159 }
7160 
7161 
index_next(uchar * buf)7162 int ha_ndbcluster::index_next(uchar *buf)
7163 {
7164   DBUG_ENTER("ha_ndbcluster::index_next");
7165   ha_statistic_increment(&SSV::ha_read_next_count);
7166   const int error= next_result(buf);
7167   table->status=error ? STATUS_NOT_FOUND: 0;
7168   DBUG_RETURN(error);
7169 }
7170 
7171 
index_prev(uchar * buf)7172 int ha_ndbcluster::index_prev(uchar *buf)
7173 {
7174   DBUG_ENTER("ha_ndbcluster::index_prev");
7175   ha_statistic_increment(&SSV::ha_read_prev_count);
7176   const int error= next_result(buf);
7177   table->status=error ? STATUS_NOT_FOUND: 0;
7178   DBUG_RETURN(error);
7179 }
7180 
7181 
index_first(uchar * buf)7182 int ha_ndbcluster::index_first(uchar *buf)
7183 {
7184   DBUG_ENTER("ha_ndbcluster::index_first");
7185   ha_statistic_increment(&SSV::ha_read_first_count);
7186   // Start the ordered index scan and fetch the first row
7187 
7188   // Only HA_READ_ORDER indexes get called by index_first
7189   const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);
7190   table->status=error ? STATUS_NOT_FOUND: 0;
7191   DBUG_RETURN(error);
7192 }
7193 
7194 
index_last(uchar * buf)7195 int ha_ndbcluster::index_last(uchar *buf)
7196 {
7197   DBUG_ENTER("ha_ndbcluster::index_last");
7198   ha_statistic_increment(&SSV::ha_read_last_count);
7199   const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);
7200   table->status=error ? STATUS_NOT_FOUND: 0;
7201   DBUG_RETURN(error);
7202 }
7203 
index_read_last(uchar * buf,const uchar * key,uint key_len)7204 int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
7205 {
7206   DBUG_ENTER("ha_ndbcluster::index_read_last");
7207   DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
7208 }
7209 
7210 
read_range_first_to_buf(const key_range * start_key,const key_range * end_key,bool desc,bool sorted,uchar * buf)7211 int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
7212                                            const key_range *end_key,
7213                                            bool desc, bool sorted,
7214                                            uchar* buf)
7215 {
7216   part_id_range part_spec;
7217   ndb_index_type type= get_index_type(active_index);
7218   const KEY* key_info= table->key_info+active_index;
7219   int error;
7220   DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
7221   DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
7222 
7223   if (unlikely((error= close_scan())))
7224     DBUG_RETURN(error);
7225 
7226   if (m_use_partition_pruning)
7227   {
7228     assert(m_pushed_join_operation != PUSHED_ROOT);
7229     get_partition_set(table, buf, active_index, start_key, &part_spec);
7230     DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
7231                         part_spec.start_part, part_spec.end_part));
7232     /*
7233       If partition pruning has found no partition in set
7234       we can return HA_ERR_END_OF_FILE
7235       If partition pruning has found exactly one partition in set
7236       we can optimize scan to run towards that partition only.
7237     */
7238     if (part_spec.start_part > part_spec.end_part)
7239     {
7240       DBUG_RETURN(HA_ERR_END_OF_FILE);
7241     }
7242 
7243     if (part_spec.start_part == part_spec.end_part)
7244     {
7245       /*
7246         Only one partition is required to scan, if sorted is required we
7247         don't need it any more since output from one ordered partitioned
7248         index is always sorted.
7249       */
7250       sorted= FALSE;
7251       if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))
7252       {
7253         DBUG_RETURN(error);
7254       }
7255     }
7256   }
7257 
7258   switch (type){
7259   case PRIMARY_KEY_ORDERED_INDEX:
7260   case PRIMARY_KEY_INDEX:
7261     if (start_key &&
7262         start_key->length == key_info->key_length &&
7263         start_key->flag == HA_READ_KEY_EXACT)
7264     {
7265       if (!m_thd_ndb->trans)
7266         if (unlikely(!start_transaction_key(active_index,
7267                                             start_key->key, error)))
7268           DBUG_RETURN(error);
7269       error= pk_read(start_key->key, start_key->length, buf,
7270 		  (m_use_partition_pruning)? &(part_spec.start_part) : NULL);
7271       DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
7272     }
7273     break;
7274   case UNIQUE_ORDERED_INDEX:
7275   case UNIQUE_INDEX:
7276     if (start_key && start_key->length == key_info->key_length &&
7277         start_key->flag == HA_READ_KEY_EXACT &&
7278         !check_null_in_key(key_info, start_key->key, start_key->length))
7279     {
7280       if (!m_thd_ndb->trans)
7281         if (unlikely(!start_transaction_key(active_index,
7282                                             start_key->key, error)))
7283           DBUG_RETURN(error);
7284       error= unique_index_read(start_key->key, start_key->length, buf);
7285       DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
7286     }
7287     else if (type == UNIQUE_INDEX)
7288       DBUG_RETURN(full_table_scan(key_info,
7289                                   start_key,
7290                                   end_key,
7291                                   buf));
7292     break;
7293   default:
7294     break;
7295   }
7296   if (!m_use_partition_pruning && !m_thd_ndb->trans)
7297   {
7298     get_partition_set(table, buf, active_index, start_key, &part_spec);
7299     if (part_spec.start_part == part_spec.end_part)
7300       if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))
7301         DBUG_RETURN(error);
7302   }
7303   // Start the ordered index scan and fetch the first row
7304   DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
7305 	  (m_use_partition_pruning)? &part_spec : NULL));
7306 }
7307 
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_r,bool sorted)7308 int ha_ndbcluster::read_range_first(const key_range *start_key,
7309                                     const key_range *end_key,
7310                                     bool eq_r, bool sorted)
7311 {
7312   uchar* buf= table->record[0];
7313   DBUG_ENTER("ha_ndbcluster::read_range_first");
7314   DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
7315                                       sorted, buf));
7316 }
7317 
read_range_next()7318 int ha_ndbcluster::read_range_next()
7319 {
7320   DBUG_ENTER("ha_ndbcluster::read_range_next");
7321   DBUG_RETURN(next_result(table->record[0]));
7322 }
7323 
7324 
rnd_init(bool scan)7325 int ha_ndbcluster::rnd_init(bool scan)
7326 {
7327   int error;
7328   DBUG_ENTER("rnd_init");
7329   DBUG_PRINT("enter", ("scan: %d", scan));
7330 
7331   if ((error= close_scan()))
7332     DBUG_RETURN(error);
7333   index_init(table_share->primary_key, 0);
7334   DBUG_RETURN(0);
7335 }
7336 
close_scan()7337 int ha_ndbcluster::close_scan()
7338 {
7339   DBUG_ENTER("close_scan");
7340 
7341   if (m_active_query)
7342   {
7343     m_active_query->close(m_thd_ndb->m_force_send);
7344     m_active_query= NULL;
7345   }
7346 
7347   NdbScanOperation *cursor= m_active_cursor;
7348   if (!cursor)
7349   {
7350     cursor = m_multi_cursor;
7351     if (!cursor)
7352       DBUG_RETURN(0);
7353   }
7354 
7355   int error;
7356   NdbTransaction *trans= m_thd_ndb->trans;
7357   if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
7358     DBUG_RETURN(error);
7359 
7360   if (m_thd_ndb->m_unsent_bytes)
7361   {
7362     /*
7363       Take over any pending transactions to the
7364       deleteing/updating transaction before closing the scan
7365     */
7366     DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
7367                         (long) m_thd_ndb->m_unsent_bytes));
7368     if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7369     {
7370       no_uncommitted_rows_execute_failure();
7371       DBUG_RETURN(ndb_err(trans));
7372     }
7373   }
7374 
7375   cursor->close(m_thd_ndb->m_force_send, TRUE);
7376   m_active_cursor= NULL;
7377   m_multi_cursor= NULL;
7378   DBUG_RETURN(0);
7379 }
7380 
rnd_end()7381 int ha_ndbcluster::rnd_end()
7382 {
7383   DBUG_ENTER("rnd_end");
7384   DBUG_RETURN(close_scan());
7385 }
7386 
7387 
rnd_next(uchar * buf)7388 int ha_ndbcluster::rnd_next(uchar *buf)
7389 {
7390   DBUG_ENTER("rnd_next");
7391   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
7392 
7393   int error;
7394   if (m_active_cursor || m_active_query)
7395     error= next_result(buf);
7396   else
7397     error= full_table_scan(NULL, NULL, NULL, buf);
7398 
7399   table->status= error ? STATUS_NOT_FOUND: 0;
7400   DBUG_RETURN(error);
7401 }
7402 
7403 
7404 /**
7405   An "interesting" record has been found and it's pk
7406   retrieved by calling position. Now it's time to read
7407   the record from db once again.
7408 */
7409 
rnd_pos(uchar * buf,uchar * pos)7410 int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
7411 {
7412   DBUG_ENTER("rnd_pos");
7413   ha_statistic_increment(&SSV::ha_read_rnd_count);
7414   // The primary key for the record is stored in pos
7415   // Perform a pk_read using primary key "index"
7416   {
7417     part_id_range part_spec;
7418     uint key_length= ref_length;
7419     if (m_user_defined_partitioning)
7420     {
7421       if (table_share->primary_key == MAX_KEY)
7422       {
7423         /*
7424           The partition id has been fetched from ndb
7425           and has been stored directly after the hidden key
7426         */
7427         DBUG_DUMP("key+part", pos, key_length);
7428         key_length= ref_length - sizeof(m_part_id);
7429         part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
7430       }
7431       else
7432       {
7433         key_range key_spec;
7434         KEY *key_info= table->key_info + table_share->primary_key;
7435         key_spec.key= pos;
7436         key_spec.length= key_length;
7437         key_spec.flag= HA_READ_KEY_EXACT;
7438         get_full_part_id_from_key(table, buf, key_info,
7439                                   &key_spec, &part_spec);
7440         assert(part_spec.start_part == part_spec.end_part);
7441       }
7442       DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
7443     }
7444     DBUG_DUMP("key", pos, key_length);
7445     int res= pk_read(pos, key_length, buf,
7446                      (m_user_defined_partitioning) ?
7447                      &(part_spec.start_part)
7448                      : NULL);
7449     if (res == HA_ERR_KEY_NOT_FOUND)
7450     {
7451       /**
7452        * When using rnd_pos
7453        *   server first retrives a set of records (typically scans them)
7454        *   and store a unique identifier (for ndb this is the primary key)
7455        *   and later retreives the record again using rnd_pos and the
7456        *   saved primary key. For ndb, since we only support committed read
7457        *   the record could have been deleted in between the "save" and
7458        *   the rnd_pos.
7459        *   Therefor we return HA_ERR_RECORD_DELETED in this case rather than
7460        *   HA_ERR_KEY_NOT_FOUND (which will cause statment to be aborted)
7461        *
7462        */
7463       res= HA_ERR_RECORD_DELETED;
7464     }
7465     table->status= res ? STATUS_NOT_FOUND: 0;
7466     DBUG_RETURN(res);
7467   }
7468 }
7469 
7470 
7471 /**
7472   Store the primary key of this record in ref
7473   variable, so that the row can be retrieved again later
7474   using "reference" in rnd_pos.
7475 */
7476 
position(const uchar * record)7477 void ha_ndbcluster::position(const uchar *record)
7478 {
7479   KEY *key_info;
7480   KEY_PART_INFO *key_part;
7481   KEY_PART_INFO *end;
7482   uchar *buff;
7483   uint key_length;
7484 
7485   DBUG_ENTER("position");
7486 
7487   if (table_share->primary_key != MAX_KEY)
7488   {
7489     key_length= ref_length;
7490     key_info= table->key_info + table_share->primary_key;
7491     key_part= key_info->key_part;
7492     end= key_part + key_info->user_defined_key_parts;
7493     buff= ref;
7494 
7495     for (; key_part != end; key_part++)
7496     {
7497       if (key_part->null_bit) {
7498         /* Store 0 if the key part is a NULL part */
7499         if (record[key_part->null_offset]
7500             & key_part->null_bit) {
7501           *buff++= 1;
7502           continue;
7503         }
7504         *buff++= 0;
7505       }
7506 
7507       size_t len = key_part->length;
7508       const uchar * ptr = record + key_part->offset;
7509       Field *field = key_part->field;
7510       if (field->type() ==  MYSQL_TYPE_VARCHAR)
7511       {
7512         size_t var_length;
7513         if (((Field_varstring*)field)->length_bytes == 1)
7514         {
7515           /**
7516            * Keys always use 2 bytes length
7517            */
7518           buff[0] = ptr[0];
7519           buff[1] = 0;
7520           var_length = ptr[0];
7521           assert(var_length <= len);
7522           memcpy(buff+2, ptr + 1, var_length);
7523         }
7524         else
7525         {
7526           var_length = ptr[0] + (ptr[1]*256);
7527           assert(var_length <= len);
7528           memcpy(buff, ptr, var_length + 2);
7529         }
7530         /**
7531           We have to zero-pad any unused VARCHAR buffer so that MySQL is
7532           able to use simple memcmp to compare two instances of the same
7533           unique key value to determine if they are equal.
7534           MySQL does this to compare contents of two 'ref' values.
7535           (Duplicate weedout algorithm is one such case.)
7536         */
7537         memset(buff+2+var_length, 0, len - var_length);
7538         len += 2;
7539       }
7540       else
7541       {
7542         memcpy(buff, ptr, len);
7543       }
7544       buff += len;
7545     }
7546   }
7547   else
7548   {
7549     // No primary key, get hidden key
7550     DBUG_PRINT("info", ("Getting hidden key"));
7551     // If table has user defined partition save the partition id as well
7552     if (m_user_defined_partitioning)
7553     {
7554       DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
7555       key_length= ref_length - sizeof(m_part_id);
7556       memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
7557     }
7558     else
7559       key_length= ref_length;
7560 #ifndef NDEBUG
7561     int hidden_no= table->s->fields;
7562     const NDBTAB *tab= m_table;
7563     const NDBCOL *hidden_col= tab->getColumn(hidden_no);
7564     assert(hidden_col->getPrimaryKey() &&
7565            hidden_col->getAutoIncrement() &&
7566            key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
7567 #endif
7568     memcpy(ref, &m_ref, key_length);
7569   }
7570 #ifndef NDEBUG
7571   if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning)
7572     DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));
7573 #endif
7574   DBUG_DUMP("ref", ref, key_length);
7575   DBUG_VOID_RETURN;
7576 }
7577 
7578 int
cmp_ref(const uchar * ref1,const uchar * ref2)7579 ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)
7580 {
7581   DBUG_ENTER("cmp_ref");
7582 
7583   if (table_share->primary_key != MAX_KEY)
7584   {
7585     KEY *key_info= table->key_info + table_share->primary_key;
7586     KEY_PART_INFO *key_part= key_info->key_part;
7587     KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
7588 
7589     for (; key_part != end; key_part++)
7590     {
7591       // NOTE: No need to check for null since PK is not-null
7592 
7593       Field *field= key_part->field;
7594       int result= field->key_cmp(ref1, ref2);
7595       if (result)
7596       {
7597         DBUG_RETURN(result);
7598       }
7599 
7600       if (field->type() ==  MYSQL_TYPE_VARCHAR)
7601       {
7602         ref1+= 2;
7603         ref2+= 2;
7604       }
7605 
7606       ref1+= key_part->length;
7607       ref2+= key_part->length;
7608     }
7609     DBUG_RETURN(0);
7610   }
7611   else
7612   {
7613     DBUG_RETURN(memcmp(ref1, ref2, ref_length));
7614   }
7615 }
7616 
info(uint flag)7617 int ha_ndbcluster::info(uint flag)
7618 {
7619   THD *thd= table->in_use;
7620   int result= 0;
7621   DBUG_ENTER("info");
7622   DBUG_PRINT("enter", ("flag: %d", flag));
7623 
7624   if (flag & HA_STATUS_POS)
7625     DBUG_PRINT("info", ("HA_STATUS_POS"));
7626   if (flag & HA_STATUS_TIME)
7627     DBUG_PRINT("info", ("HA_STATUS_TIME"));
7628   if (flag & HA_STATUS_CONST)
7629   {
7630     /*
7631       Set size required by a single record in the MRR 'HANDLER_BUFFER'.
7632       MRR buffer has both a fixed and a variable sized part.
7633       Size is calculated assuming max size of the variable part.
7634 
7635       See comments for multi_range_fixed_size() and
7636       multi_range_max_entry() regarding how the MRR buffer is organized.
7637     */
7638     stats.mrr_length_per_rec= multi_range_fixed_size(1) +
7639       multi_range_max_entry(PRIMARY_KEY_INDEX, table_share->reclength);
7640   }
7641   while (flag & HA_STATUS_VARIABLE)
7642   {
7643     if (!thd)
7644       thd= current_thd;
7645     DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
7646 
7647     if (!m_table_info)
7648     {
7649       set_my_errno(check_ndb_connection(thd));
7650       if (my_errno())
7651         DBUG_RETURN(my_errno());
7652     }
7653 
7654     /*
7655       May need to update local copy of statistics in
7656       'm_table_info', either directly from datanodes,
7657       or from shared (mutex protected) cached copy, if:
7658        1) 'use_exact_count' has been set (by config or user).
7659        2) HA_STATUS_NO_LOCK -> read from shared cached copy.
7660        3) Local copy is invalid.
7661     */
7662     bool exact_count= THDVAR(thd, use_exact_count);
7663     if (exact_count                 ||         // 1)
7664         !(flag & HA_STATUS_NO_LOCK) ||         // 2)
7665         m_table_info == NULL        ||         // 3)
7666         m_table_info->records == ~(ha_rows)0)  // 3)
7667     {
7668       result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));
7669       if (result)
7670         DBUG_RETURN(result);
7671     }
7672     /* Read from local statistics, fast and fuzzy, wo/ locks */
7673     else
7674     {
7675       assert(m_table_info->records != ~(ha_rows)0);
7676       stats.records= m_table_info->records +
7677                      m_table_info->no_uncommitted_rows_count;
7678     }
7679 
7680     if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&
7681         thd->lex->sql_command != SQLCOM_SHOW_KEYS)
7682     {
7683       /*
7684         just use whatever stats we have. However,
7685         optimizer interprets the values 0 and 1 as EXACT:
7686           -> < 2 should not be returned.
7687       */
7688       if (stats.records < 2)
7689         stats.records= 2;
7690     }
7691     break;
7692   }
7693   /* RPK moved to variable part */
7694   if (flag & HA_STATUS_VARIABLE)
7695   {
7696     /* No meaningful way to return error */
7697     DBUG_PRINT("info", ("rec_per_key"));
7698     set_rec_per_key();
7699   }
7700   if (flag & HA_STATUS_ERRKEY)
7701   {
7702     DBUG_PRINT("info", ("HA_STATUS_ERRKEY dupkey=%u", m_dupkey));
7703     errkey= m_dupkey;
7704   }
7705   if (flag & HA_STATUS_AUTO)
7706   {
7707     DBUG_PRINT("info", ("HA_STATUS_AUTO"));
7708     if (m_table && table->found_next_number_field)
7709     {
7710       if (!thd)
7711         thd= current_thd;
7712       set_my_errno(check_ndb_connection(thd));
7713       if (my_errno())
7714         DBUG_RETURN(my_errno());
7715       Ndb *ndb= get_ndb(thd);
7716       Ndb_tuple_id_range_guard g(m_share);
7717 
7718       Uint64 auto_increment_value64;
7719       if (ndb->readAutoIncrementValue(m_table, g.range,
7720                                       auto_increment_value64) == -1)
7721       {
7722         const NdbError err= ndb->getNdbError();
7723         sql_print_error("Error %lu in readAutoIncrementValue(): %s",
7724                         (ulong) err.code, err.message);
7725         stats.auto_increment_value= ~(ulonglong)0;
7726       }
7727       else
7728         stats.auto_increment_value= (ulonglong)auto_increment_value64;
7729     }
7730   }
7731 
7732   if(result == -1)
7733     result= HA_ERR_NO_CONNECTION;
7734 
7735   DBUG_RETURN(result);
7736 }
7737 
7738 
get_dynamic_partition_info(ha_statistics * stat_info,ha_checksum * check_sum,uint part_id)7739 void ha_ndbcluster::get_dynamic_partition_info(ha_statistics *stat_info,
7740                                                ha_checksum *check_sum,
7741                                                uint part_id)
7742 {
7743   DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));
7744 
7745   int error = 0;
7746   THD *thd = table->in_use;
7747 
7748   if (!thd)
7749     thd = current_thd;
7750   if (!m_table_info)
7751   {
7752     if ((error = check_ndb_connection(thd)))
7753       goto err;
7754   }
7755   error = update_stats(thd, 1, part_id);
7756 
7757   if (error == 0)
7758   {
7759     stat_info->records = stats.records;
7760     stat_info->mean_rec_length = stats.mean_rec_length;
7761     stat_info->data_file_length = stats.data_file_length;
7762     stat_info->delete_length = stats.delete_length;
7763     stat_info->max_data_file_length = stats.max_data_file_length;
7764     return;
7765   }
7766 
7767 err:
7768 
7769   DBUG_PRINT("warning",
7770     ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u",
7771      error));
7772 }
7773 
7774 
extra(enum ha_extra_function operation)7775 int ha_ndbcluster::extra(enum ha_extra_function operation)
7776 {
7777   DBUG_ENTER("extra");
7778   switch (operation) {
7779   case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
7780     DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
7781     DBUG_PRINT("info", ("Ignoring duplicate key"));
7782     m_ignore_dup_key= TRUE;
7783     break;
7784   case HA_EXTRA_NO_IGNORE_DUP_KEY:
7785     DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
7786     m_ignore_dup_key= FALSE;
7787     break;
7788   case HA_EXTRA_IGNORE_NO_KEY:
7789     DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
7790     DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7791     m_ignore_no_key= TRUE;
7792     break;
7793   case HA_EXTRA_NO_IGNORE_NO_KEY:
7794     DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
7795     DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7796     m_ignore_no_key= FALSE;
7797     break;
7798   case HA_EXTRA_WRITE_CAN_REPLACE:
7799     DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
7800     if (!m_has_unique_index ||
7801         /*
7802            Always set if slave, quick fix for bug 27378
7803            or if manual binlog application, for bug 46662
7804         */
7805         applying_binlog(current_thd))
7806     {
7807       DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
7808       m_use_write= TRUE;
7809     }
7810     break;
7811   case HA_EXTRA_WRITE_CANNOT_REPLACE:
7812     DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
7813     DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
7814     m_use_write= FALSE;
7815     break;
7816   case HA_EXTRA_DELETE_CANNOT_BATCH:
7817     DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
7818     m_delete_cannot_batch= TRUE;
7819     break;
7820   case HA_EXTRA_UPDATE_CANNOT_BATCH:
7821     DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
7822     m_update_cannot_batch= TRUE;
7823     break;
7824   // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.
7825   case HA_EXTRA_KEYREAD:
7826     DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
7827     m_disable_pushed_join= TRUE;
7828     break;
7829   case HA_EXTRA_NO_KEYREAD:
7830     DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
7831     m_disable_pushed_join= FALSE;
7832     break;
7833   default:
7834     break;
7835   }
7836 
7837   DBUG_RETURN(0);
7838 }
7839 
7840 
start_read_removal()7841 bool ha_ndbcluster::start_read_removal()
7842 {
7843   THD *thd= table->in_use;
7844   DBUG_ENTER("start_read_removal");
7845 
7846   if (uses_blob_value(table->write_set))
7847   {
7848     DBUG_PRINT("exit", ("No! Blob field in write_set"));
7849     DBUG_RETURN(false);
7850   }
7851 
7852   if (thd->lex->sql_command == SQLCOM_DELETE &&
7853       table_share->blob_fields)
7854   {
7855     DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));
7856     DBUG_RETURN(false);
7857   }
7858 
7859   if (table_share->primary_key == MAX_KEY)
7860   {
7861     DBUG_PRINT("exit", ("No! Table with hidden key"));
7862     DBUG_RETURN(false);
7863   }
7864 
7865   if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
7866   {
7867     DBUG_PRINT("exit", ("No! Updating primary key"));
7868     DBUG_RETURN(false);
7869   }
7870 
7871   if (m_has_unique_index)
7872   {
7873     for (uint i= 0; i < table_share->keys; i++)
7874     {
7875       const KEY* key= table->key_info + i;
7876       if ((key->flags & HA_NOSAME) &&
7877           bitmap_is_overlapping(table->write_set,
7878                                 m_key_fields[i]))
7879       {
7880         DBUG_PRINT("exit", ("No! Unique key %d is updated", i));
7881         DBUG_RETURN(false);
7882       }
7883     }
7884   }
7885   m_read_before_write_removal_possible= TRUE;
7886   DBUG_PRINT("exit", ("Yes, rbwr is possible!"));
7887   DBUG_RETURN(true);
7888 }
7889 
7890 
end_read_removal(void)7891 ha_rows ha_ndbcluster::end_read_removal(void)
7892 {
7893   DBUG_ENTER("end_read_removal");
7894   assert(m_read_before_write_removal_possible);
7895   DBUG_PRINT("info", ("updated: %llu, deleted: %llu",
7896                       m_rows_updated, m_rows_deleted));
7897   DBUG_RETURN(m_rows_updated + m_rows_deleted);
7898 }
7899 
7900 
reset()7901 int ha_ndbcluster::reset()
7902 {
7903   DBUG_ENTER("ha_ndbcluster::reset");
7904   if (m_cond)
7905   {
7906     m_cond->cond_clear();
7907   }
7908   assert(m_active_query == NULL);
7909   if (m_pushed_join_operation==PUSHED_ROOT)  // Root of pushed query
7910   {
7911     delete m_pushed_join_member;             // Also delete QueryDef
7912   }
7913   m_pushed_join_member= NULL;
7914   m_pushed_join_operation= -1;
7915   m_disable_pushed_join= FALSE;
7916 
7917 #if 0
7918   // Magnus, disble this "hack" until it's possible to test if
7919   // it's still needed
7920   /*
7921     Regular partition pruning will set the bitmap appropriately.
7922     Some queries like ALTER TABLE doesn't use partition pruning and
7923     thus the 'used_partitions' bitmap needs to be initialized
7924   */
7925   if (m_part_info)
7926     bitmap_set_all(&m_part_info->used_partitions);
7927 #endif
7928 
7929   /* reset flags set by extra calls */
7930   m_read_before_write_removal_possible= FALSE;
7931   m_read_before_write_removal_used= FALSE;
7932   m_rows_updated= m_rows_deleted= 0;
7933   m_ignore_dup_key= FALSE;
7934   m_use_write= FALSE;
7935   m_ignore_no_key= FALSE;
7936   m_rows_inserted= (ha_rows) 0;
7937   m_rows_to_insert= (ha_rows) 1;
7938   m_delete_cannot_batch= FALSE;
7939   m_update_cannot_batch= FALSE;
7940 
7941   assert(m_is_bulk_delete == false);
7942   m_is_bulk_delete = false;
7943   DBUG_RETURN(0);
7944 }
7945 
7946 
7947 /**
7948   Start of an insert, remember number of rows to be inserted, it will
7949   be used in write_row and get_autoincrement to send an optimal number
7950   of rows in each roundtrip to the server.
7951 
7952   @param
7953    rows     number of rows to insert, 0 if unknown
7954 */
7955 
7956 int
flush_bulk_insert(bool allow_batch)7957 ha_ndbcluster::flush_bulk_insert(bool allow_batch)
7958 {
7959   NdbTransaction *trans= m_thd_ndb->trans;
7960   DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");
7961   DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d",
7962                       (int)m_rows_inserted));
7963   assert(trans);
7964 
7965 
7966   if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))
7967   {
7968     if (!allow_batch &&
7969         execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7970     {
7971       no_uncommitted_rows_execute_failure();
7972       DBUG_RETURN(ndb_err(trans));
7973     }
7974   }
7975   else
7976   {
7977     /*
7978       signal that transaction has been broken up and hence cannot
7979       be rolled back
7980     */
7981     THD *thd= table->in_use;
7982     thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::SESSION);
7983     thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::STMT);
7984     if (execute_commit(m_thd_ndb, trans, m_thd_ndb->m_force_send,
7985                        m_ignore_no_key) != 0)
7986     {
7987       no_uncommitted_rows_execute_failure();
7988       DBUG_RETURN(ndb_err(trans));
7989     }
7990     if (trans->restart() != 0)
7991     {
7992       assert(0);
7993       DBUG_RETURN(-1);
7994     }
7995   }
7996   DBUG_RETURN(0);
7997 }
7998 
start_bulk_insert(ha_rows rows)7999 void ha_ndbcluster::start_bulk_insert(ha_rows rows)
8000 {
8001   DBUG_ENTER("start_bulk_insert");
8002   DBUG_PRINT("enter", ("rows: %d", (int)rows));
8003 
8004   m_rows_inserted= (ha_rows) 0;
8005   if (!m_use_write && m_ignore_dup_key)
8006   {
8007     /*
8008       compare if expression with that in write_row
8009       we have a situation where peek_indexed_rows() will be called
8010       so we cannot batch
8011     */
8012     DBUG_PRINT("info", ("Batching turned off as duplicate key is "
8013                         "ignored by using peek_row"));
8014     m_rows_to_insert= 1;
8015     DBUG_VOID_RETURN;
8016   }
8017   if (rows == (ha_rows) 0)
8018   {
8019     /* We don't know how many will be inserted, guess */
8020     m_rows_to_insert=
8021       (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)
8022       ? m_autoincrement_prefetch
8023       : DEFAULT_AUTO_PREFETCH;
8024     m_autoincrement_prefetch= m_rows_to_insert;
8025   }
8026   else
8027   {
8028     m_rows_to_insert= rows;
8029     if (m_autoincrement_prefetch < m_rows_to_insert)
8030       m_autoincrement_prefetch= m_rows_to_insert;
8031   }
8032 
8033   DBUG_VOID_RETURN;
8034 }
8035 
8036 /**
8037   End of an insert.
8038 */
end_bulk_insert()8039 int ha_ndbcluster::end_bulk_insert()
8040 {
8041   int error= 0;
8042 
8043   DBUG_ENTER("end_bulk_insert");
8044   // Check if last inserts need to be flushed
8045 
8046   THD *thd= table->in_use;
8047   Thd_ndb *thd_ndb= m_thd_ndb;
8048 
8049   if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)
8050   {
8051     bool allow_batch= (thd_ndb->m_handler != 0);
8052     error= flush_bulk_insert(allow_batch);
8053     if (error != 0)
8054       set_my_errno(error);
8055   }
8056 
8057   m_rows_inserted= (ha_rows) 0;
8058   m_rows_to_insert= (ha_rows) 1;
8059   DBUG_RETURN(error);
8060 }
8061 
8062 
extra_opt(enum ha_extra_function operation,ulong cache_size)8063 int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
8064 {
8065   DBUG_ENTER("extra_opt");
8066   DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
8067   DBUG_RETURN(extra(operation));
8068 }
8069 
8070 static const char *ha_ndbcluster_exts[] = {
8071  ha_ndb_ext,
8072  NullS
8073 };
8074 
bas_ext() const8075 const char** ha_ndbcluster::bas_ext() const
8076 {
8077   return ha_ndbcluster_exts;
8078 }
8079 
8080 /**
8081   How many seeks it will take to read through the table.
8082 
8083   This is to be comparable to the number returned by records_in_range so
8084   that we can decide if we should scan the table or use keys.
8085 */
8086 
scan_time()8087 double ha_ndbcluster::scan_time()
8088 {
8089   DBUG_ENTER("ha_ndbcluster::scan_time()");
8090   double res= rows2double(stats.records*1000);
8091   DBUG_PRINT("exit", ("table: %s value: %f",
8092                       m_tabname, res));
8093   DBUG_RETURN(res);
8094 }
8095 
8096 /*
8097   Convert MySQL table locks into locks supported by Ndb Cluster.
8098   Note that MySQL Cluster does currently not support distributed
8099   table locks, so to be safe one should set cluster in Single
8100   User Mode, before relying on table locks when updating tables
8101   from several MySQL servers
8102 */
8103 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)8104 THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
8105                                           THR_LOCK_DATA **to,
8106                                           enum thr_lock_type lock_type)
8107 {
8108   DBUG_ENTER("store_lock");
8109   if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK)
8110   {
8111 
8112     /* If we are not doing a LOCK TABLE, then allow multiple
8113        writers */
8114 
8115     /* Since NDB does not currently have table locks
8116        this is treated as a ordinary lock */
8117 
8118     const bool in_lock_tables = thd_in_lock_tables(thd);
8119     const uint sql_command = thd_sql_command(thd);
8120     if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
8121          lock_type <= TL_WRITE) &&
8122         !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))
8123       lock_type= TL_WRITE_ALLOW_WRITE;
8124 
8125     /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
8126        MySQL would use the lock TL_READ_NO_INSERT on t2, and that
8127        would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
8128        to t2. Convert the lock to a normal read lock to allow
8129        concurrent inserts to t2. */
8130 
8131     if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
8132       lock_type= TL_READ;
8133 
8134     /**
8135      * We need locks on source table when
8136      *   doing offline alter...
8137      * In 5.1 this worked due to TL_WRITE_ALLOW_READ...
8138      * but that has been removed in 5.5
8139      * I simply add this to get it...
8140      */
8141     if (sql_command == SQLCOM_ALTER_TABLE)
8142       lock_type = TL_WRITE;
8143 
8144     m_lock.type=lock_type;
8145   }
8146   *to++= &m_lock;
8147 
8148   DBUG_PRINT("exit", ("lock_type: %d", lock_type));
8149 
8150   DBUG_RETURN(to);
8151 }
8152 
8153 /*
8154   As MySQL will execute an external lock for every new table it uses
8155   we can use this to start the transactions.
8156   If we are in auto_commit mode we just need to start a transaction
8157   for the statement, this will be stored in thd_ndb.stmt.
8158   If not, we have to start a master transaction if there doesn't exist
8159   one from before, this will be stored in thd_ndb.all
8160 
8161   When a table lock is held one transaction will be started which holds
8162   the table lock and for each statement a hupp transaction will be started
8163   If we are locking the table then:
8164   - save the NdbDictionary::Table for easy access
8165   - save reference to table statistics
8166   - refresh list of the indexes for the table if needed (if altered)
8167  */
8168 
8169 #ifdef HAVE_NDB_BINLOG
ndbcluster_update_apply_status(THD * thd,int do_update)8170 static int ndbcluster_update_apply_status(THD *thd, int do_update)
8171 {
8172   Thd_ndb *thd_ndb= get_thd_ndb(thd);
8173   Ndb *ndb= thd_ndb->ndb;
8174   NDBDICT *dict= ndb->getDictionary();
8175   const NDBTAB *ndbtab;
8176   NdbTransaction *trans= thd_ndb->trans;
8177   ndb->setDatabaseName(NDB_REP_DB);
8178   Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
8179   if (!(ndbtab= ndbtab_g.get_table()))
8180   {
8181     return -1;
8182   }
8183   NdbOperation *op= 0;
8184   int r= 0;
8185   r|= (op= trans->getNdbOperation(ndbtab)) == 0;
8186   assert(r == 0);
8187   if (do_update)
8188     r|= op->updateTuple();
8189   else
8190     r|= op->writeTuple();
8191   assert(r == 0);
8192   // server_id
8193   r|= op->equal(0u, (Uint32)thd->server_id);
8194   assert(r == 0);
8195   if (!do_update)
8196   {
8197     // epoch
8198     r|= op->setValue(1u, (Uint64)0);
8199     assert(r == 0);
8200   }
8201   const char* group_master_log_name =
8202     ndb_mi_get_group_master_log_name();
8203   const Uint64 group_master_log_pos =
8204     ndb_mi_get_group_master_log_pos();
8205   const Uint64 future_event_relay_log_pos =
8206     ndb_mi_get_future_event_relay_log_pos();
8207   const Uint64 group_relay_log_pos =
8208     ndb_mi_get_group_relay_log_pos();
8209 
8210   // log_name
8211   char tmp_buf[FN_REFLEN];
8212   ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
8213                    group_master_log_name, (int)strlen(group_master_log_name));
8214   r|= op->setValue(2u, tmp_buf);
8215   assert(r == 0);
8216   // start_pos
8217   r|= op->setValue(3u, group_master_log_pos);
8218   assert(r == 0);
8219   // end_pos
8220   r|= op->setValue(4u, group_master_log_pos +
8221                    (future_event_relay_log_pos - group_relay_log_pos));
8222   assert(r == 0);
8223   return 0;
8224 }
8225 #endif /* HAVE_NDB_BINLOG */
8226 
8227 
8228 void
transaction_checks()8229 Thd_ndb::transaction_checks()
8230 {
8231   THD* thd = m_thd;
8232 
8233   if (thd->lex->sql_command == SQLCOM_LOAD)
8234     trans_options|= TNTO_TRANSACTIONS_OFF;
8235   else if (!thd->get_transaction()->m_flags.enabled)
8236     trans_options|= TNTO_TRANSACTIONS_OFF;
8237   else if (!THDVAR(thd, use_transactions))
8238     trans_options|= TNTO_TRANSACTIONS_OFF;
8239   m_force_send= THDVAR(thd, force_send);
8240   if (!thd->slave_thread)
8241     m_batch_size= THDVAR(thd, batch_size);
8242   else
8243   {
8244     m_batch_size= THDVAR(NULL, batch_size); /* using global value */
8245     /* Do not use hinted TC selection in slave thread */
8246     THDVAR(thd, optimized_node_selection)=
8247       THDVAR(NULL, optimized_node_selection) & 1; /* using global value */
8248   }
8249 }
8250 
8251 
start_statement(THD * thd,Thd_ndb * thd_ndb,uint table_count)8252 int ha_ndbcluster::start_statement(THD *thd,
8253                                    Thd_ndb *thd_ndb,
8254                                    uint table_count)
8255 {
8256   NdbTransaction *trans= thd_ndb->trans;
8257   int error;
8258   DBUG_ENTER("ha_ndbcluster::start_statement");
8259 
8260   m_thd_ndb= thd_ndb;
8261   m_thd_ndb->transaction_checks();
8262 
8263   if (table_count == 0)
8264   {
8265     trans_register_ha(thd, FALSE, ht, NULL);
8266     if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
8267     {
8268       if (!trans)
8269         trans_register_ha(thd, TRUE, ht, NULL);
8270       thd_ndb->m_handler= NULL;
8271     }
8272     else
8273     {
8274       /*
8275         this is an autocommit, we may keep a reference to the
8276         handler to be used in the commit phase for optimization
8277         reasons, defering execute
8278       */
8279       thd_ndb->m_handler= this;
8280     }
8281   }
8282   else
8283   {
8284     /*
8285       there is more than one handler involved, execute deferal
8286       not possible
8287     */
8288     ha_ndbcluster* handler = thd_ndb->m_handler;
8289     thd_ndb->m_handler= NULL;
8290     if (handler != NULL)
8291     {
8292       /**
8293        * If we initially belived that this could be run
8294        *  using execute deferal...but changed out mind
8295        *  add handler to thd_ndb->open_tables like it would
8296        *  have done "normally"
8297        */
8298       add_handler_to_open_tables(thd, thd_ndb, handler);
8299     }
8300   }
8301   if (!trans && table_count == 0)
8302   {
8303     assert(thd_ndb->changed_tables.is_empty() == TRUE);
8304     thd_ndb->trans_options= 0;
8305 
8306     DBUG_PRINT("trans",("Possibly starting transaction"));
8307     const uint opti_node_select = THDVAR(thd, optimized_node_selection);
8308     DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));
8309     if (!(opti_node_select & 2) ||
8310         thd->lex->sql_command == SQLCOM_LOAD)
8311       if (unlikely(!start_transaction(error)))
8312         DBUG_RETURN(error);
8313 
8314     thd_ndb->init_open_tables();
8315     thd_ndb->m_slow_path= FALSE;
8316     if (!(thd_options(thd) & OPTION_BIN_LOG) ||
8317         thd->variables.binlog_format == BINLOG_FORMAT_STMT)
8318     {
8319       thd_ndb->trans_options|= TNTO_NO_LOGGING;
8320       thd_ndb->m_slow_path= TRUE;
8321     }
8322     else if (thd->slave_thread)
8323       thd_ndb->m_slow_path= TRUE;
8324   }
8325   DBUG_RETURN(0);
8326 }
8327 
8328 int
add_handler_to_open_tables(THD * thd,Thd_ndb * thd_ndb,ha_ndbcluster * handler)8329 ha_ndbcluster::add_handler_to_open_tables(THD *thd,
8330                                           Thd_ndb *thd_ndb,
8331                                           ha_ndbcluster* handler)
8332 {
8333   DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");
8334   DBUG_PRINT("info", ("Adding %s", handler->m_share->key_string()));
8335 
8336   /**
8337    * thd_ndb->open_tables is only used iff thd_ndb->m_handler is not
8338    */
8339   assert(thd_ndb->m_handler == NULL);
8340   const void *key= handler->m_share;
8341   HASH_SEARCH_STATE state;
8342   THD_NDB_SHARE *thd_ndb_share=
8343     (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,
8344                                   (const uchar *)&key, sizeof(key),
8345                                   &state);
8346   while (thd_ndb_share && thd_ndb_share->key != key)
8347   {
8348     thd_ndb_share=
8349       (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,
8350                                    (const uchar *)&key, sizeof(key),
8351                                    &state);
8352   }
8353   if (thd_ndb_share == 0)
8354   {
8355     thd_ndb_share=
8356       (THD_NDB_SHARE *) thd->get_transaction()->allocate_memory(sizeof(THD_NDB_SHARE));
8357     if (!thd_ndb_share)
8358     {
8359       mem_alloc_error(sizeof(THD_NDB_SHARE));
8360       DBUG_RETURN(1);
8361     }
8362     thd_ndb_share->key= key;
8363     thd_ndb_share->stat.last_count= thd_ndb->count;
8364     thd_ndb_share->stat.no_uncommitted_rows_count= 0;
8365     thd_ndb_share->stat.records= ~(ha_rows)0;
8366     my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);
8367   }
8368   else if (thd_ndb_share->stat.last_count != thd_ndb->count)
8369   {
8370     thd_ndb_share->stat.last_count= thd_ndb->count;
8371     thd_ndb_share->stat.no_uncommitted_rows_count= 0;
8372     thd_ndb_share->stat.records= ~(ha_rows)0;
8373   }
8374 
8375   handler->m_table_info= &thd_ndb_share->stat;
8376   DBUG_RETURN(0);
8377 }
8378 
init_handler_for_statement(THD * thd)8379 int ha_ndbcluster::init_handler_for_statement(THD *thd)
8380 {
8381   /*
8382     This is the place to make sure this handler instance
8383     has a started transaction.
8384 
8385     The transaction is started by the first handler on which
8386     MySQL Server calls external lock
8387 
8388     Other handlers in the same stmt or transaction should use
8389     the same NDB transaction. This is done by setting up the m_thd_ndb
8390     pointer to point to the NDB transaction object.
8391    */
8392 
8393   DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
8394   Thd_ndb *thd_ndb= m_thd_ndb;
8395   assert(thd_ndb);
8396 
8397   // store thread specific data first to set the right context
8398   m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);
8399   // Start of transaction
8400   m_rows_changed= 0;
8401   m_blobs_pending= FALSE;
8402   release_blobs_buffer();
8403   m_slow_path= m_thd_ndb->m_slow_path;
8404 #ifdef HAVE_NDB_BINLOG
8405   if (unlikely(m_slow_path))
8406   {
8407     if (m_share == ndb_apply_status_share && thd->slave_thread)
8408         m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
8409   }
8410 #endif
8411 
8412   int ret = 0;
8413   if (thd_ndb->m_handler == 0)
8414   {
8415     assert(m_share);
8416     ret = add_handler_to_open_tables(thd, thd_ndb, this);
8417   }
8418   else
8419   {
8420     struct Ndb_local_table_statistics &stat= m_table_info_instance;
8421     stat.last_count= thd_ndb->count;
8422     stat.no_uncommitted_rows_count= 0;
8423     stat.records= ~(ha_rows)0;
8424     m_table_info= &stat;
8425   }
8426   DBUG_RETURN(ret);
8427 }
8428 
external_lock(THD * thd,int lock_type)8429 int ha_ndbcluster::external_lock(THD *thd, int lock_type)
8430 {
8431   DBUG_ENTER("external_lock");
8432   if (lock_type != F_UNLCK)
8433   {
8434     int error;
8435     /*
8436       Check that this handler instance has a connection
8437       set up to the Ndb object of thd
8438     */
8439     if (check_ndb_connection(thd))
8440       DBUG_RETURN(1);
8441     Thd_ndb *thd_ndb= get_thd_ndb(thd);
8442 
8443     DBUG_PRINT("enter", ("lock_type != F_UNLCK "
8444                          "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
8445                          "thd_ndb->lock_count: %d",
8446                          (long) this, (long) thd, (long) thd_ndb,
8447                          thd_ndb->lock_count));
8448 
8449     if ((error= start_statement(thd, thd_ndb,
8450                                 thd_ndb->lock_count++)))
8451     {
8452       thd_ndb->lock_count--;
8453       DBUG_RETURN(error);
8454     }
8455     if ((error= init_handler_for_statement(thd)))
8456     {
8457       thd_ndb->lock_count--;
8458       DBUG_RETURN(error);
8459     }
8460     DBUG_RETURN(0);
8461   }
8462   else
8463   {
8464     Thd_ndb *thd_ndb= m_thd_ndb;
8465     assert(thd_ndb);
8466 
8467     DBUG_PRINT("enter", ("lock_type == F_UNLCK "
8468                          "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
8469                          "thd_ndb->lock_count: %d",
8470                          (long) this, (long) thd, (long) thd_ndb,
8471                          thd_ndb->lock_count));
8472 
8473     if (m_rows_changed && global_system_variables.query_cache_type)
8474     {
8475       DBUG_PRINT("info", ("Rows has changed"));
8476 
8477       if (thd_ndb->trans &&
8478           thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
8479       {
8480         DBUG_PRINT("info", ("Add share to list of changed tables, %p",
8481                             m_share));
8482         /* NOTE push_back allocates memory using transactions mem_root! */
8483         thd_ndb->changed_tables.push_back(get_share(m_share),
8484                                           thd->get_transaction()->transaction_memroot());
8485       }
8486 
8487       if (opt_ndb_cache_check_time)
8488       {
8489         native_mutex_lock(&m_share->mutex);
8490         DBUG_PRINT("info", ("Invalidating commit_count"));
8491         m_share->commit_count= 0;
8492         m_share->commit_count_lock++;
8493         native_mutex_unlock(&m_share->mutex);
8494       }
8495     }
8496 
8497     if (!--thd_ndb->lock_count)
8498     {
8499       DBUG_PRINT("trans", ("Last external_lock"));
8500 
8501       if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&
8502           thd_ndb->trans)
8503       {
8504         if (thd_ndb->trans)
8505         {
8506           /*
8507             Unlock is done without a transaction commit / rollback.
8508             This happens if the thread didn't update any rows
8509             We must in this case close the transaction to release resources
8510           */
8511           DBUG_PRINT("trans",("ending non-updating transaction"));
8512           thd_ndb->ndb->closeTransaction(thd_ndb->trans);
8513           thd_ndb->trans= NULL;
8514           thd_ndb->m_handler= NULL;
8515         }
8516       }
8517     }
8518     m_table_info= NULL;
8519 
8520     /*
8521       This is the place to make sure this handler instance
8522       no longer are connected to the active transaction.
8523 
8524       And since the handler is no longer part of the transaction
8525       it can't have open cursors, ops, queries or blobs pending.
8526     */
8527     m_thd_ndb= NULL;
8528 
8529     assert(m_active_query == NULL);
8530     if (m_active_query)
8531       DBUG_PRINT("warning", ("m_active_query != NULL"));
8532     m_active_query= NULL;
8533 
8534     if (m_active_cursor)
8535       DBUG_PRINT("warning", ("m_active_cursor != NULL"));
8536     m_active_cursor= NULL;
8537 
8538     if (m_multi_cursor)
8539       DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
8540     m_multi_cursor= NULL;
8541 
8542     if (m_blobs_pending)
8543       DBUG_PRINT("warning", ("blobs_pending != 0"));
8544     m_blobs_pending= 0;
8545 
8546     DBUG_RETURN(0);
8547   }
8548 }
8549 
8550 /**
8551   Unlock the last row read in an open scan.
8552   Rows are unlocked by default in ndb, but
8553   for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
8554   locks are kept if unlock_row() is not called.
8555 */
8556 
unlock_row()8557 void ha_ndbcluster::unlock_row()
8558 {
8559   DBUG_ENTER("unlock_row");
8560 
8561   DBUG_PRINT("info", ("Unlocking row"));
8562   m_lock_tuple= FALSE;
8563   DBUG_VOID_RETURN;
8564 }
8565 
8566 /**
8567   Start statement, used when one of the tables are locked and also when
8568   a stored function is executed.
8569 
8570   start_stmt()
8571     thd                    Thd object
8572     lock_type              Lock type on table
8573 
8574   RETURN VALUE
8575     0                      Success
8576     >0                     Error code
8577 
8578   DESCRIPTION
8579     This call indicates the start of a statement when one of the tables in
8580     the statement are locked. In this case we cannot call external_lock.
8581     It also implies that external_lock is not called at end of statement.
8582     Rather the handlerton call commit (ndbcluster_commit) is called to
8583     indicate end of transaction. There are cases thus when the commit call
8584     actually doesn't refer to a commit but only to and end of statement.
8585 
8586     In the case of stored functions, one stored function is treated as one
8587     statement and the call to commit comes at the end of the stored function.
8588 */
8589 
start_stmt(THD * thd,thr_lock_type lock_type)8590 int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
8591 {
8592   int error=0;
8593   Thd_ndb *thd_ndb;
8594   DBUG_ENTER("start_stmt");
8595   assert(thd == table->in_use);
8596 
8597   thd_ndb= get_thd_ndb(thd);
8598   if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))
8599     goto error;
8600   if ((error= init_handler_for_statement(thd)))
8601     goto error;
8602   DBUG_RETURN(0);
8603 error:
8604   thd_ndb->start_stmt_count--;
8605   DBUG_RETURN(error);
8606 }
8607 
8608 NdbTransaction *
start_transaction_row(const NdbRecord * ndb_record,const uchar * record,int & error)8609 ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,
8610                                      const uchar *record,
8611                                      int &error)
8612 {
8613   NdbTransaction *trans;
8614   DBUG_ENTER("ha_ndbcluster::start_transaction_row");
8615   assert(m_thd_ndb);
8616   assert(m_thd_ndb->trans == NULL);
8617 
8618   m_thd_ndb->transaction_checks();
8619 
8620   Ndb *ndb= m_thd_ndb->ndb;
8621 
8622   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
8623   char *buf= (char*)&tmp[0];
8624   trans= ndb->startTransaction(ndb_record,
8625                                (const char*)record,
8626                                buf, sizeof(tmp));
8627 
8628   if (trans)
8629   {
8630     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8631     DBUG_PRINT("info", ("Delayed allocation of TC"));
8632     DBUG_RETURN(m_thd_ndb->trans= trans);
8633   }
8634 
8635   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8636   DBUG_RETURN(NULL);
8637 }
8638 
8639 NdbTransaction *
start_transaction_key(uint inx_no,const uchar * key_data,int & error)8640 ha_ndbcluster::start_transaction_key(uint inx_no,
8641                                      const uchar *key_data,
8642                                      int &error)
8643 {
8644   NdbTransaction *trans;
8645   DBUG_ENTER("ha_ndbcluster::start_transaction_key");
8646   assert(m_thd_ndb);
8647   assert(m_thd_ndb->trans == NULL);
8648 
8649   m_thd_ndb->transaction_checks();
8650 
8651   Ndb *ndb= m_thd_ndb->ndb;
8652   const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;
8653 
8654   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
8655   char *buf= (char*)&tmp[0];
8656   trans= ndb->startTransaction(key_rec,
8657                                (const char*)key_data,
8658                                buf, sizeof(tmp));
8659 
8660   if (trans)
8661   {
8662     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8663     DBUG_PRINT("info", ("Delayed allocation of TC"));
8664     DBUG_RETURN(m_thd_ndb->trans= trans);
8665   }
8666 
8667   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8668   DBUG_RETURN(NULL);
8669 }
8670 
8671 NdbTransaction *
start_transaction(int & error)8672 ha_ndbcluster::start_transaction(int &error)
8673 {
8674   NdbTransaction *trans;
8675   DBUG_ENTER("ha_ndbcluster::start_transaction");
8676 
8677   assert(m_thd_ndb);
8678   assert(m_thd_ndb->trans == NULL);
8679 
8680   m_thd_ndb->transaction_checks();
8681 
8682   const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);
8683   m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);
8684   if ((trans= m_thd_ndb->ndb->startTransaction()))
8685   {
8686     m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;
8687     DBUG_PRINT("info", ("Delayed allocation of TC"));
8688     DBUG_RETURN(m_thd_ndb->trans= trans);
8689   }
8690 
8691   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8692   DBUG_RETURN(NULL);
8693 }
8694 
8695 NdbTransaction *
start_transaction_part_id(Uint32 part_id,int & error)8696 ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)
8697 {
8698   NdbTransaction *trans;
8699   DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");
8700 
8701   assert(m_thd_ndb);
8702   assert(m_thd_ndb->trans == NULL);
8703 
8704   m_thd_ndb->transaction_checks();
8705 
8706   if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))
8707   {
8708     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
8709     DBUG_PRINT("info", ("Delayed allocation of TC"));
8710     DBUG_RETURN(m_thd_ndb->trans= trans);
8711   }
8712 
8713   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
8714   DBUG_RETURN(NULL);
8715 }
8716 
8717 /**
8718   Static error print function called from static handler method
8719   ndbcluster_commit and ndbcluster_rollback.
8720 */
8721 static void
ndbcluster_print_error(int error,const NdbOperation * error_op)8722 ndbcluster_print_error(int error, const NdbOperation *error_op)
8723 {
8724   DBUG_ENTER("ndbcluster_print_error");
8725   TABLE_SHARE share;
8726   const char *tab_name= (error_op) ? error_op->getTableName() : "";
8727   if (tab_name == NULL)
8728   {
8729     assert(tab_name != NULL);
8730     tab_name= "";
8731   }
8732   share.db.str= (char*) "";
8733   share.db.length= 0;
8734   share.table_name.str= (char *) tab_name;
8735   share.table_name.length= strlen(tab_name);
8736   ha_ndbcluster error_handler(ndbcluster_hton, &share);
8737   error_handler.print_error(error, MYF(0));
8738   DBUG_VOID_RETURN;
8739 }
8740 
8741 
8742 /**
8743   Commit a transaction started in NDB.
8744 */
8745 
ndbcluster_commit(handlerton * hton,THD * thd,bool all)8746 int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
8747 {
8748   int res= 0;
8749   Thd_ndb *thd_ndb= get_thd_ndb(thd);
8750   Ndb *ndb= thd_ndb->ndb;
8751   NdbTransaction *trans= thd_ndb->trans;
8752   bool retry_slave_trans = false;
8753   (void) retry_slave_trans;
8754 
8755   DBUG_ENTER("ndbcluster_commit");
8756   assert(ndb);
8757   DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));
8758   thd_ndb->start_stmt_count= 0;
8759   if (trans == NULL)
8760   {
8761     DBUG_PRINT("info", ("trans == NULL"));
8762     DBUG_RETURN(0);
8763   }
8764   if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
8765   {
8766     /*
8767       An odditity in the handler interface is that commit on handlerton
8768       is called to indicate end of statement only in cases where
8769       autocommit isn't used and the all flag isn't set.
8770 
8771       We also leave quickly when a transaction haven't even been started,
8772       in this case we are safe that no clean up is needed. In this case
8773       the MySQL Server could handle the query without contacting the
8774       NDB kernel.
8775     */
8776     thd_ndb->save_point_count++;
8777     DBUG_PRINT("info", ("Commit before start or end-of-statement only"));
8778     DBUG_RETURN(0);
8779   }
8780   thd_ndb->save_point_count= 0;
8781 
8782 #ifdef HAVE_NDB_BINLOG
8783   if (unlikely(thd_ndb->m_slow_path))
8784   {
8785     if (thd->slave_thread)
8786       ndbcluster_update_apply_status
8787         (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
8788   }
8789 #endif /* HAVE_NDB_BINLOG */
8790 
8791   if (thd->slave_thread)
8792   {
8793 #ifdef HAVE_NDB_BINLOG
8794     /* If this slave transaction has included conflict detecting ops
8795      * and some defined operations are not yet sent, then perform
8796      * an execute(NoCommit) before committing, as conflict op handling
8797      * is done by execute(NoCommit)
8798      */
8799     /* TODO : Add as function */
8800     if (g_ndb_slave_state.conflict_flags & SCS_OPS_DEFINED)
8801     {
8802       if (thd_ndb->m_unsent_bytes)
8803         res = execute_no_commit(thd_ndb, trans, TRUE);
8804     }
8805 
8806     if (likely(res == 0))
8807       res = g_ndb_slave_state.atConflictPreCommit(retry_slave_trans);
8808 #endif /* HAVE_NDB_BINLOG */
8809 
8810     if (likely(res == 0))
8811       res= execute_commit(thd_ndb, trans, 1, TRUE);
8812 
8813     update_slave_api_stats(thd_ndb->ndb);
8814   }
8815   else
8816   {
8817     if (thd_ndb->m_handler &&
8818         thd_ndb->m_handler->m_read_before_write_removal_possible)
8819     {
8820       /*
8821         This is an autocommit involving only one table and
8822         rbwr is on, thus the transaction has already been
8823         committed in exec_bulk_update() or end_bulk_delete()
8824       */
8825       DBUG_PRINT("info", ("autocommit+rbwr, transaction already committed"));
8826       const NdbTransaction::CommitStatusType commitStatus = trans->commitStatus();
8827 
8828       if(commitStatus == NdbTransaction::Committed)
8829       {
8830         /* Already committed transaction to save roundtrip */
8831         assert(get_thd_ndb(current_thd)->m_error == FALSE);
8832       }
8833       else if(commitStatus == NdbTransaction::Aborted)
8834       {
8835         /* Commit failed before transaction was started */
8836         assert(get_thd_ndb(current_thd)->m_error == TRUE);
8837       }
8838       else if(commitStatus == NdbTransaction::NeedAbort)
8839       {
8840         /* Commit attempt failed and rollback is needed */
8841         res = -1;
8842 
8843       }
8844       else
8845       {
8846         /* Commit was never attempted - this should not be possible */
8847         assert(commitStatus == NdbTransaction::Started || commitStatus == NdbTransaction::NotStarted);
8848         sql_print_error("found uncommitted autocommit+rbwr transaction, "
8849                         "commit status: %d", commitStatus);
8850         abort();
8851       }
8852     }
8853     else
8854     {
8855       const bool ignore_error= applying_binlog(thd);
8856       res= execute_commit(thd_ndb, trans,
8857                           THDVAR(thd, force_send),
8858                           ignore_error);
8859     }
8860   }
8861 
8862   if (res != 0)
8863   {
8864 #ifdef HAVE_NDB_BINLOG
8865     if (retry_slave_trans)
8866     {
8867       if (st_ndb_slave_state::MAX_RETRY_TRANS_COUNT >
8868           g_ndb_slave_state.retry_trans_count++)
8869       {
8870         /*
8871            Warning is necessary to cause retry from slave.cc
8872            exec_relay_log_event()
8873         */
8874         push_warning(thd, Sql_condition::SL_WARNING,
8875                      ER_SLAVE_SILENT_RETRY_TRANSACTION,
8876                      "Slave transaction rollback requested");
8877         /*
8878           Set retry count to zero to:
8879           1) Avoid consuming slave-temp-error retry attempts
8880           2) Ensure no inter-attempt sleep
8881 
8882           Better fix : Save + restore retry count around transactional
8883           conflict handling
8884         */
8885         ndb_mi_set_relay_log_trans_retries(0);
8886       }
8887       else
8888       {
8889         /*
8890            Too many retries, print error and exit - normal
8891            too many retries mechanism will cause exit
8892          */
8893         sql_print_error("Ndb slave retried transaction %u time(s) in vain.  Giving up.",
8894                         st_ndb_slave_state::MAX_RETRY_TRANS_COUNT);
8895       }
8896       res= ER_GET_TEMPORARY_ERRMSG;
8897     }
8898     else
8899 #endif
8900     {
8901       const NdbError err= trans->getNdbError();
8902       const NdbOperation *error_op= trans->getNdbErrorOperation();
8903       res= ndb_to_mysql_error(&err);
8904       if (res != -1)
8905         ndbcluster_print_error(res, error_op);
8906     }
8907   }
8908   else
8909   {
8910     /* Update shared statistics for tables inserted into / deleted from*/
8911     if (thd_ndb->m_handler &&      // Autocommit Txn
8912         thd_ndb->m_handler->m_share &&
8913         thd_ndb->m_handler->m_table_info)
8914     {
8915       modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);
8916     }
8917 
8918     /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */
8919     for (uint i= 0; i<thd_ndb->open_tables.records; i++)
8920     {
8921       THD_NDB_SHARE *thd_share=
8922         (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);
8923       modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);
8924     }
8925   }
8926 
8927   ndb->closeTransaction(trans);
8928   thd_ndb->trans= NULL;
8929   thd_ndb->m_handler= NULL;
8930 
8931   /* Clear commit_count for tables changed by transaction */
8932   NDB_SHARE* share;
8933   List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8934   while ((share= it++))
8935   {
8936     DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8937                         share));
8938     native_mutex_lock(&share->mutex);
8939     DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
8940                         share->table_name, (ulong) share->commit_count));
8941     share->commit_count= 0;
8942     share->commit_count_lock++;
8943     native_mutex_unlock(&share->mutex);
8944     free_share(&share);
8945   }
8946   thd_ndb->changed_tables.empty();
8947 
8948   DBUG_RETURN(res);
8949 }
8950 
8951 
8952 /**
8953   Rollback a transaction started in NDB.
8954 */
8955 
ndbcluster_rollback(handlerton * hton,THD * thd,bool all)8956 static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
8957 {
8958   int res= 0;
8959   Thd_ndb *thd_ndb= get_thd_ndb(thd);
8960   Ndb *ndb= thd_ndb->ndb;
8961   NdbTransaction *trans= thd_ndb->trans;
8962 
8963   DBUG_ENTER("ndbcluster_rollback");
8964   DBUG_PRINT("enter", ("all: %d  thd_ndb->save_point_count: %d",
8965                        all, thd_ndb->save_point_count));
8966   assert(ndb);
8967   thd_ndb->start_stmt_count= 0;
8968   if (trans == NULL)
8969   {
8970     /* Ignore end-of-statement until real rollback or commit is called */
8971     DBUG_PRINT("info", ("trans == NULL"));
8972     DBUG_RETURN(0);
8973   }
8974   if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
8975       (thd_ndb->save_point_count > 0))
8976   {
8977     /*
8978       Ignore end-of-statement until real rollback or commit is called
8979       as ndb does not support rollback statement
8980       - mark that rollback was unsuccessful, this will cause full rollback
8981       of the transaction
8982     */
8983     DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));
8984     thd_mark_transaction_to_rollback(thd, 1);
8985     my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");
8986     DBUG_RETURN(0);
8987   }
8988   thd_ndb->save_point_count= 0;
8989   if (thd->slave_thread)
8990     g_ndb_slave_state.atTransactionAbort();
8991   thd_ndb->m_unsent_bytes= 0;
8992   thd_ndb->m_execute_count++;
8993   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
8994   if (trans->execute(NdbTransaction::Rollback) != 0)
8995   {
8996     const NdbError err= trans->getNdbError();
8997     const NdbOperation *error_op= trans->getNdbErrorOperation();
8998     res= ndb_to_mysql_error(&err);
8999     if (res != -1)
9000       ndbcluster_print_error(res, error_op);
9001   }
9002   ndb->closeTransaction(trans);
9003   thd_ndb->trans= NULL;
9004   thd_ndb->m_handler= NULL;
9005 
9006   /* Clear list of tables changed by transaction */
9007   NDB_SHARE* share;
9008   List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
9009   while ((share= it++))
9010   {
9011     DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
9012                         share));
9013     free_share(&share);
9014   }
9015   thd_ndb->changed_tables.empty();
9016 
9017   if (thd->slave_thread)
9018     update_slave_api_stats(thd_ndb->ndb);
9019 
9020   DBUG_RETURN(res);
9021 }
9022 
9023 /**
9024  * Support for create table/column modifiers
9025  *   by exploiting the comment field
9026  */
9027 struct NDB_Modifier
9028 {
9029   enum { M_BOOL } m_type;
9030   const char * m_name;
9031   size_t m_name_len;
9032   bool m_found;
9033   union {
9034     bool m_val_bool;
9035 #ifdef TODO__
9036     int m_val_int;
9037     struct {
9038       const char * str;
9039       size_t len;
9040     } m_val_str;
9041 #endif
9042   };
9043 };
9044 
9045 static const
9046 struct NDB_Modifier ndb_table_modifiers[] =
9047 {
9048   { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },
9049   { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
9050 };
9051 
9052 static const
9053 struct NDB_Modifier ndb_column_modifiers[] =
9054 {
9055   { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },
9056   { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
9057 };
9058 
9059 /**
9060  * NDB_Modifiers
9061  *
9062  * This class implements a simple parser for getting modifiers out
9063  *   of a string (e.g a comment field)
9064  */
9065 class NDB_Modifiers
9066 {
9067 public:
9068   NDB_Modifiers(const NDB_Modifier modifiers[]);
9069   ~NDB_Modifiers();
9070 
9071   /**
9072    * parse string-with length (not necessarily NULL terminated)
9073    */
9074   int parse(THD* thd, const char * prefix, const char * str, size_t strlen);
9075 
9076   /**
9077    * Get modifier...returns NULL if unknown
9078    */
9079   const NDB_Modifier * get(const char * name) const;
9080 private:
9081   uint m_len;
9082   struct NDB_Modifier * m_modifiers;
9083 
9084   int parse_modifier(THD *thd, const char * prefix,
9085                      struct NDB_Modifier* m, const char * str);
9086 };
9087 
9088 static
9089 bool
end_of_token(const char * str)9090 end_of_token(const char * str)
9091 {
9092   return str[0] == 0 || str[0] == ' ' || str[0] == ',';
9093 }
9094 
NDB_Modifiers(const NDB_Modifier modifiers[])9095 NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])
9096 {
9097   for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)
9098   {}
9099   m_modifiers = new NDB_Modifier[m_len];
9100   memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));
9101 }
9102 
~NDB_Modifiers()9103 NDB_Modifiers::~NDB_Modifiers()
9104 {
9105   delete [] m_modifiers;
9106 }
9107 
9108 int
parse_modifier(THD * thd,const char * prefix,struct NDB_Modifier * m,const char * str)9109 NDB_Modifiers::parse_modifier(THD *thd,
9110                               const char * prefix,
9111                               struct NDB_Modifier* m,
9112                               const char * str)
9113 {
9114   if (m->m_found)
9115   {
9116     push_warning_printf(thd, Sql_condition::SL_WARNING,
9117                         ER_ILLEGAL_HA_CREATE_OPTION,
9118                         "%s : modifier %s specified twice",
9119                         prefix, m->m_name);
9120   }
9121 
9122   switch(m->m_type){
9123   case NDB_Modifier::M_BOOL:
9124     if (end_of_token(str))
9125     {
9126       m->m_val_bool = true;
9127       goto found;
9128     }
9129     if (str[0] != '=')
9130       break;
9131 
9132     str++;
9133     if (str[0] == '1' && end_of_token(str+1))
9134     {
9135       m->m_val_bool = true;
9136       goto found;
9137     }
9138 
9139     if (str[0] == '0' && end_of_token(str+1))
9140     {
9141       m->m_val_bool = false;
9142       goto found;
9143     }
9144   }
9145 
9146   {
9147     const char * end = strpbrk(str, " ,");
9148     if (end)
9149     {
9150       push_warning_printf(thd, Sql_condition::SL_WARNING,
9151                           ER_ILLEGAL_HA_CREATE_OPTION,
9152                           "%s : invalid value '%.*s' for %s",
9153                           prefix, (int)(end - str), str, m->m_name);
9154     }
9155     else
9156     {
9157       push_warning_printf(thd, Sql_condition::SL_WARNING,
9158                           ER_ILLEGAL_HA_CREATE_OPTION,
9159                           "%s : invalid value '%s' for %s",
9160                           prefix, str, m->m_name);
9161     }
9162   }
9163   return -1;
9164 found:
9165   m->m_found = true;
9166   return 0;
9167 }
9168 
9169 int
parse(THD * thd,const char * prefix,const char * _source,size_t _source_len)9170 NDB_Modifiers::parse(THD *thd,
9171                      const char * prefix,
9172                      const char * _source,
9173                      size_t _source_len)
9174 {
9175   if (_source == 0 || _source_len == 0)
9176     return 0;
9177 
9178   const char * source = 0;
9179 
9180   /**
9181    * Check if _source is NULL-terminated
9182    */
9183   for (size_t i = 0; i<_source_len; i++)
9184   {
9185     if (_source[i] == 0)
9186     {
9187       source = _source;
9188       break;
9189     }
9190   }
9191 
9192   if (source == 0)
9193   {
9194     /**
9195      * Make NULL terminated string so that strXXX-functions are safe
9196      */
9197     char * tmp = new char[_source_len+1];
9198     if (tmp == 0)
9199     {
9200       push_warning_printf(thd, Sql_condition::SL_WARNING,
9201                           ER_ILLEGAL_HA_CREATE_OPTION,
9202                           "%s : unable to parse due to out of memory",
9203                           prefix);
9204       return -1;
9205     }
9206     memcpy(tmp, _source, _source_len);
9207     tmp[_source_len] = 0;
9208     source = tmp;
9209   }
9210 
9211   const char * pos = source;
9212   if ((pos = strstr(pos, prefix)) == 0)
9213   {
9214     if (source != _source)
9215       delete [] source;
9216     return 0;
9217   }
9218 
9219   pos += strlen(prefix);
9220 
9221   while (pos && pos[0] != 0 && pos[0] != ' ')
9222   {
9223     const char * end = strpbrk(pos, " ,"); // end of current modifier
9224 
9225     for (uint i = 0; i < m_len; i++)
9226     {
9227       size_t l = m_modifiers[i].m_name_len;
9228       if (strncmp(pos, m_modifiers[i].m_name, l) == 0)
9229       {
9230         /**
9231          * Found modifier...
9232          */
9233 
9234         if (! (end_of_token(pos + l) || pos[l] == '='))
9235           goto unknown;
9236 
9237         pos += l;
9238         int res = parse_modifier(thd, prefix, m_modifiers+i, pos);
9239 
9240         if (res == -1)
9241         {
9242           /**
9243            * We continue parsing even if modifier had error
9244            */
9245         }
9246 
9247         goto next;
9248       }
9249     }
9250 
9251     {
9252   unknown:
9253       if (end)
9254       {
9255         push_warning_printf(thd, Sql_condition::SL_WARNING,
9256                             ER_ILLEGAL_HA_CREATE_OPTION,
9257                             "%s : unknown modifier: %.*s",
9258                             prefix, (int)(end - pos), pos);
9259       }
9260       else
9261       {
9262         push_warning_printf(thd, Sql_condition::SL_WARNING,
9263                             ER_ILLEGAL_HA_CREATE_OPTION,
9264                             "%s : unknown modifier: %s",
9265                             prefix, pos);
9266       }
9267     }
9268 
9269 next:
9270     pos = end;
9271     if (pos && pos[0] == ',')
9272       pos++;
9273   }
9274 
9275   if (source != _source)
9276     delete [] source;
9277 
9278   return 0;
9279 }
9280 
9281 const NDB_Modifier *
get(const char * name) const9282 NDB_Modifiers::get(const char * name) const
9283 {
9284   for (uint i = 0; i < m_len; i++)
9285   {
9286     if (strcmp(name, m_modifiers[i].m_name) == 0)
9287     {
9288       return m_modifiers + i;
9289     }
9290   }
9291   return 0;
9292 }
9293 
9294 /**
9295   Define NDB column based on Field.
9296 
9297   Not member of ha_ndbcluster because NDBCOL cannot be declared.
9298 
9299   MySQL text types with character set "binary" are mapped to true
9300   NDB binary types without a character set.
9301 
9302   Blobs are V2 and striping from mysql level is not supported
9303   due to lack of syntax and lack of support for partitioning.
9304 
9305   @return
9306     Returns 0 or mysql error code.
9307 */
9308 
9309 static bool
ndb_blob_striping()9310 ndb_blob_striping()
9311 {
9312 #ifndef NDEBUG
9313   const char* p= getenv("NDB_BLOB_STRIPING");
9314   if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
9315     return true;
9316 #endif
9317   return false;
9318 }
9319 
9320 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
9321 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;
9322 #else
9323 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;
9324 #endif
9325 
9326 static int
create_ndb_column(THD * thd,NDBCOL & col,Field * field,HA_CREATE_INFO * create_info,column_format_type default_format=COLUMN_FORMAT_TYPE_DEFAULT)9327 create_ndb_column(THD *thd,
9328                   NDBCOL &col,
9329                   Field *field,
9330                   HA_CREATE_INFO *create_info,
9331                   column_format_type default_format= COLUMN_FORMAT_TYPE_DEFAULT)
9332 {
9333   NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;
9334   bool dynamic= FALSE;
9335 
9336   char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];
9337   DBUG_ENTER("create_ndb_column");
9338   // Set name
9339   if (col.setName(field->field_name))
9340   {
9341     set_my_errno(errno);
9342     DBUG_RETURN(errno);
9343   }
9344   // Get char set
9345   CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());
9346   // Set type and sizes
9347   const enum enum_field_types mysql_type= field->real_type();
9348 
9349   NDB_Modifiers column_modifiers(ndb_column_modifiers);
9350   column_modifiers.parse(thd, "NDB_COLUMN=",
9351                          field->comment.str,
9352                          field->comment.length);
9353 
9354   const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");
9355 
9356   {
9357     /* Clear default value (col obj is reused for whole table def) */
9358     col.setDefaultValue(NULL, 0);
9359 
9360     /* If the data nodes are capable then set native
9361      * default.
9362      */
9363     bool nativeDefaults =
9364       ! (thd &&
9365          (! ndb_native_default_support(get_thd_ndb(thd)->
9366                                        ndb->getMinDbNodeVersion())));
9367 
9368     if (likely( nativeDefaults ))
9369     {
9370       if ((!(field->flags & PRI_KEY_FLAG) ) &&
9371           type_supports_default_value(mysql_type))
9372       {
9373         if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
9374         {
9375           my_ptrdiff_t src_offset= field->table->default_values_offset();
9376           if ((! field->is_real_null(src_offset)) ||
9377               ((field->flags & NOT_NULL_FLAG)))
9378           {
9379             /* Set a non-null native default */
9380             memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);
9381             get_default_value(buf, field);
9382 
9383             /* For bit columns, default length is rounded up to
9384                nearest word, ensuring all data sent
9385             */
9386             Uint32 defaultLen = field_used_length(field);
9387             if(field->type() == MYSQL_TYPE_BIT)
9388               defaultLen = ((defaultLen + 3) /4) * 4;
9389             col.setDefaultValue(buf, defaultLen);
9390           }
9391         }
9392       }
9393     }
9394   }
9395   switch (mysql_type) {
9396   // Numeric types
9397   case MYSQL_TYPE_TINY:
9398     if (field->flags & UNSIGNED_FLAG)
9399       col.setType(NDBCOL::Tinyunsigned);
9400     else
9401       col.setType(NDBCOL::Tinyint);
9402     col.setLength(1);
9403     break;
9404   case MYSQL_TYPE_SHORT:
9405     if (field->flags & UNSIGNED_FLAG)
9406       col.setType(NDBCOL::Smallunsigned);
9407     else
9408       col.setType(NDBCOL::Smallint);
9409     col.setLength(1);
9410     break;
9411   case MYSQL_TYPE_LONG:
9412     if (field->flags & UNSIGNED_FLAG)
9413       col.setType(NDBCOL::Unsigned);
9414     else
9415       col.setType(NDBCOL::Int);
9416     col.setLength(1);
9417     break;
9418   case MYSQL_TYPE_INT24:
9419     if (field->flags & UNSIGNED_FLAG)
9420       col.setType(NDBCOL::Mediumunsigned);
9421     else
9422       col.setType(NDBCOL::Mediumint);
9423     col.setLength(1);
9424     break;
9425   case MYSQL_TYPE_LONGLONG:
9426     if (field->flags & UNSIGNED_FLAG)
9427       col.setType(NDBCOL::Bigunsigned);
9428     else
9429       col.setType(NDBCOL::Bigint);
9430     col.setLength(1);
9431     break;
9432   case MYSQL_TYPE_FLOAT:
9433     col.setType(NDBCOL::Float);
9434     col.setLength(1);
9435     break;
9436   case MYSQL_TYPE_DOUBLE:
9437     col.setType(NDBCOL::Double);
9438     col.setLength(1);
9439     break;
9440   case MYSQL_TYPE_DECIMAL:
9441     {
9442       Field_decimal *f= (Field_decimal*)field;
9443       uint precision= f->pack_length();
9444       uint scale= f->decimals();
9445       if (field->flags & UNSIGNED_FLAG)
9446       {
9447         col.setType(NDBCOL::Olddecimalunsigned);
9448         precision-= (scale > 0);
9449       }
9450       else
9451       {
9452         col.setType(NDBCOL::Olddecimal);
9453         precision-= 1 + (scale > 0);
9454       }
9455       col.setPrecision(precision);
9456       col.setScale(scale);
9457       col.setLength(1);
9458     }
9459     break;
9460   case MYSQL_TYPE_NEWDECIMAL:
9461     {
9462       Field_new_decimal *f= (Field_new_decimal*)field;
9463       uint precision= f->precision;
9464       uint scale= f->decimals();
9465       if (field->flags & UNSIGNED_FLAG)
9466       {
9467         col.setType(NDBCOL::Decimalunsigned);
9468       }
9469       else
9470       {
9471         col.setType(NDBCOL::Decimal);
9472       }
9473       col.setPrecision(precision);
9474       col.setScale(scale);
9475       col.setLength(1);
9476     }
9477     break;
9478   // Date types
9479   case MYSQL_TYPE_DATETIME:
9480     col.setType(NDBCOL::Datetime);
9481     col.setLength(1);
9482     break;
9483   case MYSQL_TYPE_DATETIME2:
9484     {
9485       Field_datetimef *f= (Field_datetimef*)field;
9486       uint prec= f->decimals();
9487       col.setType(NDBCOL::Datetime2);
9488       col.setLength(1);
9489       col.setPrecision(prec);
9490     }
9491     break;
9492   case MYSQL_TYPE_DATE: // ?
9493     col.setType(NDBCOL::Char);
9494     col.setLength(field->pack_length());
9495     break;
9496   case MYSQL_TYPE_NEWDATE:
9497     col.setType(NDBCOL::Date);
9498     col.setLength(1);
9499     break;
9500   case MYSQL_TYPE_TIME:
9501     col.setType(NDBCOL::Time);
9502     col.setLength(1);
9503     break;
9504   case MYSQL_TYPE_TIME2:
9505     {
9506       Field_timef *f= (Field_timef*)field;
9507       uint prec= f->decimals();
9508       col.setType(NDBCOL::Time2);
9509       col.setLength(1);
9510       col.setPrecision(prec);
9511     }
9512     break;
9513   case MYSQL_TYPE_YEAR:
9514     col.setType(NDBCOL::Year);
9515     col.setLength(1);
9516     break;
9517   case MYSQL_TYPE_TIMESTAMP:
9518     col.setType(NDBCOL::Timestamp);
9519     col.setLength(1);
9520     break;
9521   case MYSQL_TYPE_TIMESTAMP2:
9522     {
9523       Field_timestampf *f= (Field_timestampf*)field;
9524       uint prec= f->decimals();
9525       col.setType(NDBCOL::Timestamp2);
9526       col.setLength(1);
9527       col.setPrecision(prec);
9528     }
9529     break;
9530   // Char types
9531   case MYSQL_TYPE_STRING:
9532     if (field->pack_length() == 0)
9533     {
9534       col.setType(NDBCOL::Bit);
9535       col.setLength(1);
9536     }
9537     else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9538     {
9539       col.setType(NDBCOL::Binary);
9540       col.setLength(field->pack_length());
9541     }
9542     else
9543     {
9544       col.setType(NDBCOL::Char);
9545       col.setCharset(cs);
9546       col.setLength(field->pack_length());
9547     }
9548     break;
9549   case MYSQL_TYPE_VAR_STRING: // ?
9550   case MYSQL_TYPE_VARCHAR:
9551     {
9552       Field_varstring* f= (Field_varstring*)field;
9553       if (f->length_bytes == 1)
9554       {
9555         if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9556           col.setType(NDBCOL::Varbinary);
9557         else {
9558           col.setType(NDBCOL::Varchar);
9559           col.setCharset(cs);
9560         }
9561       }
9562       else if (f->length_bytes == 2)
9563       {
9564         if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9565           col.setType(NDBCOL::Longvarbinary);
9566         else {
9567           col.setType(NDBCOL::Longvarchar);
9568           col.setCharset(cs);
9569         }
9570       }
9571       else
9572       {
9573         DBUG_RETURN(HA_ERR_UNSUPPORTED);
9574       }
9575       col.setLength(field->field_length);
9576     }
9577     break;
9578   // Blob types (all come in as MYSQL_TYPE_BLOB)
9579   mysql_type_tiny_blob:
9580   case MYSQL_TYPE_TINY_BLOB:
9581     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9582       col.setType(NDBCOL::Blob);
9583     else {
9584       col.setType(NDBCOL::Text);
9585       col.setCharset(cs);
9586     }
9587     col.setInlineSize(256);
9588     // No parts
9589     col.setPartSize(0);
9590     col.setStripeSize(ndb_blob_striping() ? 0 : 0);
9591     break;
9592   //mysql_type_blob:
9593   case MYSQL_TYPE_GEOMETRY:
9594   case MYSQL_TYPE_BLOB:
9595     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9596       col.setType(NDBCOL::Blob);
9597     else {
9598       col.setType(NDBCOL::Text);
9599       col.setCharset(cs);
9600     }
9601     {
9602       Field_blob *field_blob= (Field_blob *)field;
9603       /*
9604        * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
9605        * Tinyblob gets no blob parts.  The other cases are just a crude
9606        * way to control part size and striping.
9607        *
9608        * In mysql blob(256) is promoted to blob(65535) so it does not
9609        * in fact fit "inline" in NDB.
9610        */
9611       if (field_blob->max_data_length() < (1 << 8))
9612         goto mysql_type_tiny_blob;
9613       else if (field_blob->max_data_length() < (1 << 16))
9614       {
9615         col.setInlineSize(256);
9616         col.setPartSize(2000);
9617         col.setStripeSize(ndb_blob_striping() ? 16 : 0);
9618         if (mod_maxblob->m_found)
9619         {
9620           col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9621         }
9622       }
9623       else if (field_blob->max_data_length() < (1 << 24))
9624         goto mysql_type_medium_blob;
9625       else
9626         goto mysql_type_long_blob;
9627     }
9628     break;
9629   mysql_type_medium_blob:
9630   case MYSQL_TYPE_MEDIUM_BLOB:
9631     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9632       col.setType(NDBCOL::Blob);
9633     else {
9634       col.setType(NDBCOL::Text);
9635       col.setCharset(cs);
9636     }
9637     col.setInlineSize(256);
9638     col.setPartSize(4000);
9639     col.setStripeSize(ndb_blob_striping() ? 8 : 0);
9640     if (mod_maxblob->m_found)
9641     {
9642       col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9643     }
9644     break;
9645   mysql_type_long_blob:
9646   case MYSQL_TYPE_LONG_BLOB:
9647     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
9648       col.setType(NDBCOL::Blob);
9649     else {
9650       col.setType(NDBCOL::Text);
9651       col.setCharset(cs);
9652     }
9653     col.setInlineSize(256);
9654     col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9655     col.setStripeSize(ndb_blob_striping() ? 4 : 0);
9656     if (mod_maxblob->m_found)
9657     {
9658       col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
9659     }
9660     break;
9661 
9662   // MySQL 5.7 binary-encoded JSON type
9663   case MYSQL_TYPE_JSON:
9664   {
9665     /*
9666       JSON columns are just like LONG BLOB columns except for inline size
9667       and part size. Inline size is chosen to accommodate a large number
9668       of embedded json documents without spilling over to the part table.
9669       The tradeoff is that only three JSON columns can be defined in a table
9670       due to the large inline size. Part size is chosen to optimize use of
9671       pages in the part table. Note that much of the JSON functionality is
9672       available by storing JSON documents in VARCHAR columns, including
9673       extracting keys from documents to be used as indexes.
9674      */
9675     const int NDB_JSON_INLINE_SIZE = 4000;
9676     const int NDB_JSON_PART_SIZE = 8100;
9677 
9678     col.setType(NDBCOL::Blob);
9679     col.setInlineSize(NDB_JSON_INLINE_SIZE);
9680     col.setPartSize(NDB_JSON_PART_SIZE);
9681     col.setStripeSize(ndb_blob_striping() ? 16 : 0);
9682     break;
9683   }
9684 
9685   // Other types
9686   case MYSQL_TYPE_ENUM:
9687     col.setType(NDBCOL::Char);
9688     col.setLength(field->pack_length());
9689     break;
9690   case MYSQL_TYPE_SET:
9691     col.setType(NDBCOL::Char);
9692     col.setLength(field->pack_length());
9693     break;
9694   case MYSQL_TYPE_BIT:
9695   {
9696     int no_of_bits= field->field_length;
9697     col.setType(NDBCOL::Bit);
9698     if (!no_of_bits)
9699       col.setLength(1);
9700       else
9701         col.setLength(no_of_bits);
9702     break;
9703   }
9704   case MYSQL_TYPE_NULL:
9705     goto mysql_type_unsupported;
9706   mysql_type_unsupported:
9707   default:
9708     DBUG_RETURN(HA_ERR_UNSUPPORTED);
9709   }
9710   // Set nullable and pk
9711   col.setNullable(field->maybe_null());
9712   col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
9713   if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)
9714   {
9715     col.setPartitionKey(TRUE);
9716   }
9717 
9718   // Set autoincrement
9719   if (field->flags & AUTO_INCREMENT_FLAG)
9720   {
9721     col.setAutoIncrement(TRUE);
9722     ulonglong value= create_info->auto_increment_value ?
9723       create_info->auto_increment_value : (ulonglong) 1;
9724     DBUG_PRINT("info", ("Autoincrement key, initial: %llu", value));
9725     col.setAutoIncrementInitialValue(value);
9726   }
9727   else
9728     col.setAutoIncrement(FALSE);
9729 
9730   DBUG_PRINT("info", ("storage: %u  format: %u  ",
9731                       field->field_storage_type(),
9732                       field->column_format()));
9733   switch (field->field_storage_type()) {
9734   case(HA_SM_DEFAULT):
9735   default:
9736     if (create_info->storage_media == HA_SM_DISK)
9737       type= NDBCOL::StorageTypeDisk;
9738     else
9739       type= NDBCOL::StorageTypeMemory;
9740     break;
9741   case(HA_SM_DISK):
9742     type= NDBCOL::StorageTypeDisk;
9743     break;
9744   case(HA_SM_MEMORY):
9745     type= NDBCOL::StorageTypeMemory;
9746     break;
9747   }
9748 
9749   switch (field->column_format()) {
9750   case(COLUMN_FORMAT_TYPE_FIXED):
9751     dynamic= FALSE;
9752     break;
9753   case(COLUMN_FORMAT_TYPE_DYNAMIC):
9754     dynamic= TRUE;
9755     break;
9756   case(COLUMN_FORMAT_TYPE_DEFAULT):
9757   default:
9758     if (create_info->row_type == ROW_TYPE_DEFAULT)
9759       dynamic= default_format;
9760     else
9761       dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);
9762     break;
9763   }
9764   DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,
9765                       (dynamic) ? "dynamic" : "static"));
9766   if (type == NDBCOL::StorageTypeDisk)
9767   {
9768     if (dynamic)
9769     {
9770       DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",
9771                           field->field_name));
9772       dynamic= false;
9773     }
9774 
9775     if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)
9776     {
9777       push_warning_printf(thd, Sql_condition::SL_WARNING,
9778                           ER_ILLEGAL_HA_CREATE_OPTION,
9779                           "DYNAMIC column %s with "
9780                           "STORAGE DISK is not supported, "
9781                           "column will become FIXED",
9782                           field->field_name);
9783     }
9784   }
9785 
9786   switch (create_info->row_type) {
9787   case ROW_TYPE_FIXED:
9788     if (thd && (dynamic || field_type_forces_var_part(field->type())))
9789     {
9790       push_warning_printf(thd, Sql_condition::SL_WARNING,
9791                           ER_ILLEGAL_HA_CREATE_OPTION,
9792                           "Row format FIXED incompatible with "
9793                           "dynamic attribute %s",
9794                           field->field_name);
9795     }
9796     break;
9797   case ROW_TYPE_DYNAMIC:
9798     /*
9799       Future: make columns dynamic in this case
9800     */
9801     break;
9802   default:
9803     break;
9804   }
9805 
9806   DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));
9807   col.setStorageType(type);
9808   col.setDynamic(dynamic);
9809 
9810   DBUG_RETURN(0);
9811 }
9812 
update_create_info(HA_CREATE_INFO * create_info)9813 void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)
9814 {
9815   DBUG_ENTER("ha_ndbcluster::update_create_info");
9816   THD *thd= current_thd;
9817   const NDBTAB *ndbtab= m_table;
9818   Ndb *ndb= check_ndb_in_thd(thd);
9819 
9820   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
9821   {
9822     /*
9823       Find any initial auto_increment value
9824     */
9825     for (uint i= 0; i < table->s->fields; i++)
9826     {
9827       Field *field= table->field[i];
9828       if (field->flags & AUTO_INCREMENT_FLAG)
9829       {
9830         ulonglong auto_value;
9831         uint retries= NDB_AUTO_INCREMENT_RETRIES;
9832         int retry_sleep= 30; /* 30 milliseconds, transaction */
9833         for (;;)
9834         {
9835           Ndb_tuple_id_range_guard g(m_share);
9836           if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))
9837           {
9838             if (--retries && !thd->killed &&
9839                 ndb->getNdbError().status == NdbError::TemporaryError)
9840             {
9841               do_retry_sleep(retry_sleep);
9842               continue;
9843             }
9844             const NdbError err= ndb->getNdbError();
9845             sql_print_error("Error %lu in ::update_create_info(): %s",
9846                             (ulong) err.code, err.message);
9847             DBUG_VOID_RETURN;
9848           }
9849           break;
9850         }
9851         if (auto_value > 1)
9852         {
9853           create_info->auto_increment_value= auto_value;
9854         }
9855         break;
9856       }
9857     }
9858   }
9859 
9860   /*
9861     FK data is handled in get_metadata and release_metadata but
9862     for some reason it is not enough
9863   */
9864   if (1)
9865   {
9866     int error= get_fk_data(thd, ndb);
9867     if (error != 0)
9868     {
9869       sql_print_error("update_create_info: get FK data: error %d", error);
9870       DBUG_VOID_RETURN;
9871     }
9872   }
9873 
9874   DBUG_VOID_RETURN;
9875 }
9876 
9877 /*
9878   Create a table in NDB Cluster
9879  */
get_no_fragments(ulonglong max_rows)9880 static uint get_no_fragments(ulonglong max_rows)
9881 {
9882   ulonglong acc_row_size= 25 + /*safety margin*/ 2;
9883   ulonglong acc_fragment_size= 512*1024*1024;
9884   return uint((max_rows*acc_row_size)/acc_fragment_size)+1;
9885 }
9886 
9887 
9888 /*
9889   Routine to adjust default number of partitions to always be a multiple
9890   of number of nodes and never more than 4 times the number of nodes.
9891 
9892 */
9893 static
9894 bool
adjusted_frag_count(Ndb * ndb,uint requested_frags,uint & reported_frags)9895 adjusted_frag_count(Ndb* ndb,
9896                     uint requested_frags,
9897                     uint &reported_frags)
9898 {
9899   unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();
9900   unsigned no_replicas= no_nodes == 1 ? 1 : 2;
9901 
9902   unsigned no_threads= 1;
9903   const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;
9904 
9905   {
9906     /**
9907      * Use SYSTAB_0 to get #replicas, and to guess #threads
9908      */
9909     char dbname[FN_HEADLEN+1];
9910     dbname[FN_HEADLEN]= 0;
9911     my_stpnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);
9912     ndb->setDatabaseName("sys");
9913     Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");
9914     const NdbDictionary::Table * tab = ndbtab_g.get_table();
9915     if (tab)
9916     {
9917       no_replicas= ndbtab_g.get_table()->getReplicaCount();
9918 
9919       /**
9920        * Guess #threads
9921        */
9922       {
9923         const Uint32 frags = tab->getFragmentCount();
9924         Uint32 node = 0;
9925         Uint32 cnt = 0;
9926         for (Uint32 i = 0; i<frags; i++)
9927         {
9928           Uint32 replicas[4];
9929           if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))
9930           {
9931             if (node == replicas[0] || node == 0)
9932             {
9933               node = replicas[0];
9934               cnt ++;
9935             }
9936           }
9937         }
9938         no_threads = cnt; // No of primary replica on 1-node
9939       }
9940     }
9941     ndb->setDatabaseName(dbname);
9942   }
9943 
9944   const unsigned usable_nodes = no_replicas * no_nodegroups;
9945   const uint max_replicas = 8 * usable_nodes * no_threads;
9946 
9947   reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads
9948   Uint32 replicas = reported_frags * no_replicas;
9949 
9950   /**
9951    * Loop until requested replicas, and not exceed max-replicas
9952    */
9953   while (reported_frags < requested_frags &&
9954          (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)
9955   {
9956     reported_frags += usable_nodes * no_threads;
9957     replicas += usable_nodes * no_threads * no_replicas;
9958   }
9959 
9960   return (reported_frags < requested_frags);
9961 }
9962 
9963 
9964 extern bool ndb_fk_util_truncate_allowed(THD* thd,
9965                                          NdbDictionary::Dictionary* dict,
9966                                          const char* db,
9967                                          const NdbDictionary::Table* tab,
9968                                          bool& allow);
9969 
9970 /*
9971   Forward declaration of the utility functions used
9972   when creating partitioned tables
9973 */
9974 static int
9975 create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
9976                                    partition_info *part_info,
9977                                    NdbDictionary::Table&);
9978 static int
9979 create_table_set_range_data(const partition_info* part_info,
9980                             NdbDictionary::Table&);
9981 static int
9982 create_table_set_list_data(const partition_info* part_info,
9983                            NdbDictionary::Table&);
9984 
9985 
9986 /**
9987   Create a table in NDB Cluster
9988 */
9989 
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)9990 int ha_ndbcluster::create(const char *name,
9991                           TABLE *form,
9992                           HA_CREATE_INFO *create_info)
9993 {
9994   THD *thd= current_thd;
9995   NDBTAB tab;
9996   NDBCOL col;
9997   size_t pack_length, length;
9998   uint i, pk_length= 0;
9999   uchar *data= NULL, *pack_data= NULL;
10000   bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);
10001   bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
10002   bool is_alter= (thd->lex->sql_command == SQLCOM_ALTER_TABLE);
10003   bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
10004   bool use_disk= FALSE;
10005   NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
10006   bool ndb_sys_table= FALSE;
10007   int result= 0;
10008   NdbDictionary::ObjectId objId;
10009   Ndb_fk_list fk_list_for_truncate;
10010 
10011   DBUG_ENTER("ha_ndbcluster::create");
10012   DBUG_PRINT("enter", ("name: %s", name));
10013 
10014   if (create_temporary)
10015   {
10016     /*
10017       Ndb does not support temporary tables
10018      */
10019     set_my_errno(ER_ILLEGAL_HA_CREATE_OPTION);
10020     DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));
10021     push_warning_printf(thd, Sql_condition::SL_WARNING,
10022                         ER_ILLEGAL_HA_CREATE_OPTION,
10023                         "Ndb doesn't support temporary tables");
10024     DBUG_RETURN(my_errno());
10025   }
10026 
10027   assert(*fn_rext((char*)name) == 0);
10028   set_dbname(name);
10029   set_tabname(name);
10030 
10031   /*
10032     Check that database name and table name will fit within limits
10033   */
10034   if (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
10035       strlen(m_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
10036   {
10037     char *invalid_identifier=
10038         (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE)?m_dbname:m_tabname;
10039     push_warning_printf(thd, Sql_condition::SL_WARNING,
10040                         ER_TOO_LONG_IDENT,
10041                         "Ndb has an internal limit of %u bytes on the size of schema identifiers",
10042                         NDB_MAX_DDL_NAME_BYTESIZE);
10043     my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
10044     DBUG_RETURN(HA_WRONG_CREATE_OPTION);
10045   }
10046 
10047   set_my_errno(check_ndb_connection(thd));
10048   if (my_errno())
10049     DBUG_RETURN(my_errno());
10050 
10051   Ndb *ndb= get_ndb(thd);
10052   NDBDICT *dict= ndb->getDictionary();
10053 
10054   table= form;
10055   if (create_from_engine)
10056   {
10057     /*
10058       Table already exists in NDB and frm file has been created by
10059       caller.
10060       Do Ndb specific stuff, such as create a .ndb file
10061     */
10062     set_my_errno(write_ndb_file(name));
10063     if (my_errno())
10064       DBUG_RETURN(my_errno());
10065 
10066     ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
10067                                    m_dbname, m_tabname, form);
10068     if (my_errno() == HA_ERR_TABLE_EXIST)
10069     {
10070       push_warning_printf(thd, Sql_condition::SL_WARNING,
10071                           ER_TABLE_EXISTS_ERROR,
10072                           "Failed to setup replication of table %s.%s",
10073                           m_dbname, m_tabname);
10074       set_my_errno(0);
10075     }
10076 
10077 
10078     DBUG_RETURN(my_errno());
10079   }
10080 
10081   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10082 
10083   if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||
10084         thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))
10085 
10086     DBUG_RETURN(HA_ERR_NO_CONNECTION);
10087 
10088 
10089   if (!ndb_schema_dist_is_ready())
10090   {
10091     /*
10092       Don't allow table creation unless schema distribution is ready
10093       ( unless it is a creation of the schema dist table itself )
10094     */
10095     if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
10096           strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
10097     {
10098       DBUG_PRINT("info", ("Schema distribution table not setup"));
10099       DBUG_RETURN(HA_ERR_NO_CONNECTION);
10100     }
10101     single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
10102     ndb_sys_table= TRUE;
10103   }
10104 
10105   if (!ndb_apply_status_share)
10106   {
10107     if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&
10108          strcmp(m_tabname, NDB_APPLY_TABLE) == 0))
10109     {
10110       ndb_sys_table= TRUE;
10111     }
10112   }
10113 
10114   if (is_truncate)
10115   {
10116     Ndb_table_guard ndbtab_g(dict);
10117     ndbtab_g.init(m_tabname);
10118     if (!ndbtab_g.get_table())
10119       ERR_RETURN(dict->getNdbError());
10120 
10121     /*
10122       Don't allow truncate on table which is foreign key parent.
10123       This is kind of a kludge to get legacy compatibility behaviour
10124       but it also reduces the complexity involved in rewriting
10125       fks during this "recreate".
10126      */
10127     bool allow;
10128     if (!ndb_fk_util_truncate_allowed(thd, dict, m_dbname,
10129                                       ndbtab_g.get_table(), allow))
10130     {
10131       DBUG_RETURN(HA_ERR_NO_CONNECTION);
10132     }
10133     if (!allow)
10134     {
10135       my_error(ER_TRUNCATE_ILLEGAL_FK, MYF(0), "");
10136       DBUG_RETURN(1);
10137     }
10138 
10139     /* save the foreign key information in fk_list */
10140     int err;
10141     if ((err= get_fk_data_for_truncate(dict, ndbtab_g.get_table(),
10142                                        fk_list_for_truncate)))
10143       DBUG_RETURN(err);
10144 
10145     DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
10146     if ((err= delete_table(name)))
10147       DBUG_RETURN(err);
10148     ndbtab_g.reinit();
10149   }
10150 
10151   NDB_Modifiers table_modifiers(ndb_table_modifiers);
10152   table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
10153                         create_info->comment.length);
10154   const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");
10155 
10156 #ifdef HAVE_NDB_BINLOG
10157   /* Read ndb_replication entry for this table, if any */
10158   Uint32 binlog_flags;
10159   const st_conflict_fn_def* conflict_fn= NULL;
10160   st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
10161   Uint32 num_args = MAX_CONFLICT_ARGS;
10162 
10163   int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,
10164                                                           ndb,
10165                                                           m_dbname,
10166                                                           m_tabname,
10167                                                           ::server_id,
10168                                                           &binlog_flags,
10169                                                           &conflict_fn,
10170                                                           args,
10171                                                           &num_args);
10172   if (rep_read_rc != 0)
10173   {
10174     DBUG_RETURN(rep_read_rc);
10175   }
10176 
10177   /* Reset database name */
10178   ndb->setDatabaseName(m_dbname);
10179 
10180   /* TODO : Add as per conflict function 'virtual' */
10181   /* Use ndb_replication information as required */
10182   if (conflict_fn != NULL)
10183   {
10184     switch(conflict_fn->type)
10185     {
10186     case CFT_NDB_EPOCH:
10187     case CFT_NDB_EPOCH_TRANS:
10188     case CFT_NDB_EPOCH2:
10189     case CFT_NDB_EPOCH2_TRANS:
10190     {
10191       /* Default 6 extra Gci bits allows 2^6 == 64
10192        * epochs / saveGCP, a comfortable default
10193        */
10194       Uint32 numExtraGciBits = 6;
10195       Uint32 numExtraAuthorBits = 1;
10196 
10197       if ((num_args == 1) &&
10198           (args[0].type == CFAT_EXTRA_GCI_BITS))
10199       {
10200         numExtraGciBits = args[0].extraGciBits;
10201       }
10202       DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "
10203                           "ExtraAuthorBits to %u",
10204                           numExtraGciBits,
10205                           numExtraAuthorBits));
10206 
10207       tab.setExtraRowGciBits(numExtraGciBits);
10208       tab.setExtraRowAuthorBits(numExtraAuthorBits);
10209     }
10210     default:
10211       break;
10212     }
10213   }
10214 #endif
10215 
10216   if ((dict->beginSchemaTrans() == -1))
10217   {
10218     DBUG_PRINT("info", ("Failed to start schema transaction"));
10219     goto err_return;
10220   }
10221   DBUG_PRINT("info", ("Started schema transaction"));
10222 
10223   DBUG_PRINT("table", ("name: %s", m_tabname));
10224   if (tab.setName(m_tabname))
10225   {
10226     set_my_errno(errno);
10227     goto abort;
10228   }
10229   if (!ndb_sys_table)
10230   {
10231     if (THDVAR(thd, table_temporary))
10232     {
10233 #ifdef DOES_NOT_WORK_CURRENTLY
10234       tab.setTemporary(TRUE);
10235 #endif
10236       tab.setLogging(FALSE);
10237     }
10238     else if (THDVAR(thd, table_no_logging))
10239     {
10240       tab.setLogging(FALSE);
10241     }
10242 
10243     if (mod_nologging->m_found)
10244     {
10245       tab.setLogging(!mod_nologging->m_val_bool);
10246     }
10247   }
10248   tab.setSingleUserMode(single_user_mode);
10249 
10250   // Save frm data for this table
10251   if (readfrm(name, &data, &length))
10252   {
10253     result= 1;
10254     goto abort_return;
10255   }
10256   if (packfrm(data, length, &pack_data, &pack_length))
10257   {
10258     my_free((char*)data, MYF(0));
10259     result= 2;
10260     goto abort_return;
10261   }
10262   DBUG_PRINT("info",
10263              ("setFrm data: 0x%lx  len: %lu", (long) pack_data,
10264               (ulong) pack_length));
10265   tab.setFrm(pack_data, Uint32(pack_length));
10266   my_free((char*)data, MYF(0));
10267   my_free((char*)pack_data, MYF(0));
10268 
10269   /*
10270     Handle table row type
10271 
10272     Default is to let table rows have var part reference so that online
10273     add column can be performed in the future.  Explicitly setting row
10274     type to fixed will omit var part reference, which will save data
10275     memory in ndb, but at the cost of not being able to online add
10276     column to this table
10277   */
10278   switch (create_info->row_type) {
10279   case ROW_TYPE_FIXED:
10280     tab.setForceVarPart(FALSE);
10281     break;
10282   case ROW_TYPE_DYNAMIC:
10283     /* fall through, treat as default */
10284   default:
10285     /* fall through, treat as default */
10286   case ROW_TYPE_DEFAULT:
10287     tab.setForceVarPart(TRUE);
10288     break;
10289   }
10290 
10291   /*
10292     Setup columns
10293   */
10294   my_bitmap_map *old_map;
10295   {
10296     restore_record(form, s->default_values);
10297     old_map= tmp_use_all_columns(form, form->read_set);
10298   }
10299 
10300   for (i= 0; i < form->s->fields; i++)
10301   {
10302     Field *field= form->field[i];
10303     DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",
10304                         field->field_name, field->real_type(),
10305                         field->pack_length()));
10306     set_my_errno(create_ndb_column(thd, col, field, create_info));
10307     if (my_errno())
10308       goto abort;
10309 
10310     if (!use_disk &&
10311         col.getStorageType() == NDBCOL::StorageTypeDisk)
10312       use_disk= TRUE;
10313 
10314     if (tab.addColumn(col))
10315     {
10316       set_my_errno(errno);
10317       goto abort;
10318     }
10319     if (col.getPrimaryKey())
10320       pk_length += (field->pack_length() + 3) / 4;
10321   }
10322 
10323   tmp_restore_column_map(form->read_set, old_map);
10324   if (use_disk)
10325   {
10326     tab.setLogging(TRUE);
10327     tab.setTemporary(FALSE);
10328     if (create_info->tablespace)
10329       tab.setTablespaceName(create_info->tablespace);
10330     else
10331       tab.setTablespaceName("DEFAULT-TS");
10332   }
10333 
10334   // Save the table level storage media setting
10335   switch(create_info->storage_media)
10336   {
10337     case HA_SM_DISK:
10338       tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);
10339       break;
10340     case HA_SM_DEFAULT:
10341       tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);
10342       break;
10343     case HA_SM_MEMORY:
10344       tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);
10345       break;
10346   }
10347 
10348   DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",
10349                       m_tabname,
10350                       (use_disk) ? "disk" : "memory",
10351                       (use_disk) ? tab.getTablespaceName() : "N/A"));
10352 
10353   KEY* key_info;
10354   for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
10355   {
10356     KEY_PART_INFO *key_part= key_info->key_part;
10357     KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
10358     for (; key_part != end; key_part++)
10359     {
10360       if (key_part->field->field_storage_type() == HA_SM_DISK)
10361       {
10362         push_warning_printf(thd, Sql_condition::SL_WARNING,
10363                             ER_ILLEGAL_HA_CREATE_OPTION,
10364                             ER(ER_ILLEGAL_HA_CREATE_OPTION),
10365                             ndbcluster_hton_name,
10366                             "Index on field "
10367                             "declared with "
10368                             "STORAGE DISK is not supported");
10369         result= HA_ERR_UNSUPPORTED;
10370         goto abort_return;
10371       }
10372       tab.getColumn(key_part->fieldnr-1)->setStorageType(
10373                              NdbDictionary::Column::StorageTypeMemory);
10374     }
10375   }
10376 
10377   // No primary key, create shadow key as 64 bit, auto increment
10378   if (form->s->primary_key == MAX_KEY)
10379   {
10380     DBUG_PRINT("info", ("Generating shadow key"));
10381     if (col.setName("$PK"))
10382     {
10383       set_my_errno(errno);
10384       goto abort;
10385     }
10386     col.setType(NdbDictionary::Column::Bigunsigned);
10387     col.setLength(1);
10388     col.setNullable(FALSE);
10389     col.setPrimaryKey(TRUE);
10390     col.setAutoIncrement(TRUE);
10391     col.setDefaultValue(NULL, 0);
10392     if (tab.addColumn(col))
10393     {
10394       set_my_errno(errno);
10395       goto abort;
10396     }
10397     pk_length += 2;
10398   }
10399 
10400   // Make sure that blob tables don't have too big part size
10401   for (i= 0; i < form->s->fields; i++)
10402   {
10403     /**
10404      * The extra +7 concists
10405      * 2 - words from pk in blob table
10406      * 5 - from extra words added by tup/dict??
10407      */
10408 
10409     // To be upgrade/downgrade safe...we currently use
10410     // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set
10411     switch (form->field[i]->real_type()) {
10412     case MYSQL_TYPE_GEOMETRY:
10413     case MYSQL_TYPE_BLOB:
10414     case MYSQL_TYPE_MEDIUM_BLOB:
10415     case MYSQL_TYPE_LONG_BLOB:
10416     case MYSQL_TYPE_JSON:
10417     {
10418       NdbDictionary::Column * column= tab.getColumn(i);
10419       unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;
10420       unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;
10421       if (column->getPartSize() > (int)(4 * ndb_max))
10422         ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE
10423 
10424       if (size > ndb_max &&
10425           (pk_length+7) < ndb_max)
10426       {
10427         size= ndb_max - pk_length - 7;
10428         column->setPartSize(4*size);
10429       }
10430       /**
10431        * If size > NDB_MAX and pk_length+7 >= NDB_MAX
10432        *   then the table can't be created anyway, so skip
10433        *   changing part size, and have error later
10434        */
10435     }
10436     default:
10437       break;
10438     }
10439   }
10440 
10441   // Assume that table_share->max/min_rows equals create_info->min/max
10442   // although this is create so create_info should be used
10443   assert(create_info->max_rows == table_share->max_rows);
10444   assert(create_info->min_rows == table_share->min_rows);
10445 
10446   // Check partition info
10447   set_my_errno(create_table_set_up_partition_info(create_info,
10448                                                   form->part_info,
10449                                                   tab));
10450   if (my_errno())
10451     goto abort;
10452 
10453   if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
10454       tab.getDefaultNoPartitionsFlag() &&
10455       (create_info->max_rows != 0 || create_info->min_rows != 0))
10456   {
10457     ulonglong rows= create_info->max_rows >= create_info->min_rows ?
10458       create_info->max_rows :
10459       create_info->min_rows;
10460     uint no_fragments= get_no_fragments(rows);
10461     uint reported_frags= no_fragments;
10462     if (adjusted_frag_count(ndb, no_fragments, reported_frags))
10463     {
10464       push_warning(current_thd,
10465                    Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
10466                    "Ndb might have problems storing the max amount "
10467                    "of rows specified");
10468     }
10469     tab.setFragmentCount(reported_frags);
10470     tab.setDefaultNoPartitionsFlag(false);
10471     tab.setFragmentData(0, 0);
10472   }
10473 
10474   // Check for HashMap
10475   if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
10476       tab.getDefaultNoPartitionsFlag())
10477   {
10478     tab.setFragmentCount(0);
10479     tab.setFragmentData(0, 0);
10480   }
10481   else if (tab.getFragmentType() == NDBTAB::HashMapPartition)
10482   {
10483     NdbDictionary::HashMap hm;
10484     int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());
10485     if (res == -1)
10486     {
10487       res= dict->initDefaultHashMap(hm, tab.getFragmentCount());
10488       if (res == -1)
10489       {
10490         const NdbError err= dict->getNdbError();
10491         set_my_errno(ndb_to_mysql_error(&err));
10492         goto abort;
10493       }
10494 
10495       res= dict->createHashMap(hm);
10496       if (res == -1)
10497       {
10498         const NdbError err= dict->getNdbError();
10499         set_my_errno(ndb_to_mysql_error(&err));
10500         goto abort;
10501       }
10502     }
10503   }
10504 
10505   // Create the table in NDB
10506   if (dict->createTable(tab, &objId) != 0)
10507   {
10508     const NdbError err= dict->getNdbError();
10509     set_my_errno(ndb_to_mysql_error(&err));
10510     goto abort;
10511   }
10512 
10513   DBUG_PRINT("info", ("Table %s/%s created successfully",
10514                       m_dbname, m_tabname));
10515 
10516   // Create secondary indexes
10517   tab.assignObjId(objId);
10518   m_table= &tab;
10519   set_my_errno(create_indexes(thd, ndb, form));
10520 
10521   if (!is_truncate && my_errno() == 0)
10522   {
10523     set_my_errno(create_fks(thd, ndb));
10524   }
10525 
10526   if (is_alter && my_errno() == 0)
10527   {
10528     /**
10529      * mysql doesnt know/care about FK (buhhh)
10530      *   so we need to copy the old ones ourselves
10531      */
10532     set_my_errno(copy_fk_for_offline_alter(thd, ndb, &tab));
10533   }
10534 
10535   if (!fk_list_for_truncate.is_empty() && my_errno() == 0)
10536   {
10537     /*
10538      create FKs for the new table from the list got from old table.
10539      for truncate table.
10540      */
10541     set_my_errno(recreate_fk_for_truncate(thd, ndb, tab.getName(),
10542                                           fk_list_for_truncate));
10543   }
10544 
10545   m_table= 0;
10546 
10547   if (!my_errno())
10548   {
10549     /*
10550      * All steps have succeeded, try and commit schema transaction
10551      */
10552     if (dict->endSchemaTrans() == -1)
10553       goto err_return;
10554     set_my_errno(write_ndb_file(name));
10555   }
10556   else
10557   {
10558 abort:
10559 /*
10560  *  Some step during table creation failed, abort schema transaction
10561  */
10562     DBUG_PRINT("info", ("Aborting schema transaction due to error %i",
10563                         my_errno()));
10564     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10565         == -1)
10566       DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10567                           dict->getNdbError().code));
10568     m_table= 0;
10569 
10570     {
10571       // Flush the table out of ndbapi's dictionary cache
10572       Ndb_table_guard ndbtab_g(dict);
10573       ndbtab_g.init(m_tabname);
10574       ndbtab_g.invalidate();
10575     }
10576 
10577     DBUG_RETURN(my_errno());
10578 abort_return:
10579     DBUG_PRINT("info", ("Aborting schema transaction"));
10580     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10581         == -1)
10582       DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10583                           dict->getNdbError().code));
10584     DBUG_RETURN(result);
10585 err_return:
10586     m_table= 0;
10587     ERR_RETURN(dict->getNdbError());
10588   }
10589 
10590   /**
10591    * createTable/index schema transaction OK
10592    */
10593   Ndb_table_guard ndbtab_g(dict, m_tabname);
10594   m_table= ndbtab_g.get_table();
10595 
10596   if (my_errno())
10597   {
10598     /*
10599       Failed to create an index,
10600       drop the table (and all it's indexes)
10601     */
10602     while (!thd->killed)
10603     {
10604       if (dict->beginSchemaTrans() == -1)
10605         goto cleanup_failed;
10606       if (dict->dropTableGlobal(*m_table))
10607       {
10608         switch (dict->getNdbError().status)
10609         {
10610         case NdbError::TemporaryError:
10611           if (!thd->killed)
10612           {
10613             if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
10614                 == -1)
10615               DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
10616                                   dict->getNdbError().code));
10617             goto cleanup_failed;
10618           }
10619           break;
10620         default:
10621           break;
10622         }
10623       }
10624       if (dict->endSchemaTrans() == -1)
10625       {
10626 cleanup_failed:
10627         DBUG_PRINT("info", ("Could not cleanup failed create %i",
10628                           dict->getNdbError().code));
10629         continue; // retry indefinitly
10630       }
10631       break;
10632     }
10633     m_table = 0;
10634     DBUG_RETURN(my_errno());
10635   }
10636   else // if (!my_errno)
10637   {
10638     NDB_SHARE *share= 0;
10639     native_mutex_lock(&ndbcluster_mutex);
10640     /*
10641       First make sure we get a "fresh" share here, not an old trailing one...
10642     */
10643     {
10644       uint length= (uint) strlen(name);
10645       if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
10646                                               (const uchar*) name, length)))
10647         handle_trailing_share(thd, share);
10648     }
10649     /*
10650       get a new share
10651     */
10652 
10653     /* ndb_share reference create */
10654     if (!(share= get_share(name, form, TRUE, TRUE)))
10655     {
10656       sql_print_error("NDB: allocating table share for %s failed", name);
10657       /* my_errno is set */
10658     }
10659     else
10660     {
10661       DBUG_PRINT("NDB_SHARE", ("%s binlog create  use_count: %u",
10662                                share->key_string(), share->use_count));
10663     }
10664     native_mutex_unlock(&ndbcluster_mutex);
10665 
10666     while (!IS_TMP_PREFIX(m_tabname))
10667     {
10668 #ifdef HAVE_NDB_BINLOG
10669       if (share)
10670       {
10671         /* Set the Binlogging information we retrieved above */
10672         ndbcluster_apply_binlog_replication_info(thd,
10673                                                  share,
10674                                                  m_table,
10675                                                  conflict_fn,
10676                                                  args,
10677                                                  num_args,
10678                                                  TRUE, /* Do set binlog flags */
10679                                                  binlog_flags);
10680       }
10681 #endif
10682       String event_name(INJECTOR_EVENT_LEN);
10683       ndb_rep_event_name(&event_name, m_dbname, m_tabname,
10684                          get_binlog_full(share));
10685       int do_event_op= ndb_binlog_running;
10686 
10687       if (!ndb_schema_dist_is_ready() &&
10688           strcmp(share->db, NDB_REP_DB) == 0 &&
10689           strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
10690         do_event_op= 1;
10691 
10692       /*
10693         Always create an event for the table, as other mysql servers
10694         expect it to be there.
10695       */
10696       if (!Ndb_dist_priv_util::is_distributed_priv_table(m_dbname, m_tabname) &&
10697           !ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,
10698                                    do_event_op ? 2 : 1/* push warning */))
10699       {
10700         if (opt_ndb_extra_logging)
10701           sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
10702                                 event_name.c_ptr());
10703 
10704         if (ndbcluster_create_event_ops(thd, share,
10705                                         m_table, event_name.c_ptr()))
10706         {
10707           sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
10708                           " Event: %s", name);
10709           /* a warning has been issued to the client */
10710         }
10711       }
10712       /*
10713         warning has been issued if ndbcluster_create_event failed
10714         and (share && do_event_op)
10715       */
10716       if (share && !do_event_op)
10717         set_binlog_nologging(share);
10718       ndbcluster_log_schema_op(thd,
10719                                thd->query().str, thd->query().length,
10720                                share->db, share->table_name,
10721                                m_table->getObjectId(),
10722                                m_table->getObjectVersion(),
10723                                (is_truncate) ?
10724 			       SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE,
10725 			       NULL, NULL);
10726       break;
10727     }
10728   }
10729 
10730   m_table= 0;
10731   DBUG_RETURN(my_errno());
10732 }
10733 
10734 
create_index(THD * thd,const char * name,KEY * key_info,NDB_INDEX_TYPE idx_type,uint idx_no) const10735 int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info,
10736                                 NDB_INDEX_TYPE idx_type, uint idx_no) const
10737 {
10738   int error= 0;
10739   char unique_name[FN_LEN + 1];
10740   static const char* unique_suffix= "$unique";
10741   DBUG_ENTER("ha_ndbcluster::create_index");
10742   DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));
10743 
10744   if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
10745   {
10746     strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
10747     DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
10748                         unique_name, idx_no));
10749   }
10750 
10751   switch (idx_type){
10752   case PRIMARY_KEY_INDEX:
10753     // Do nothing, already created
10754     break;
10755   case PRIMARY_KEY_ORDERED_INDEX:
10756     error= create_ordered_index(thd, name, key_info);
10757     break;
10758   case UNIQUE_ORDERED_INDEX:
10759     if (!(error= create_ordered_index(thd, name, key_info)))
10760       error= create_unique_index(thd, unique_name, key_info);
10761     break;
10762   case UNIQUE_INDEX:
10763     if (check_index_fields_not_null(key_info))
10764     {
10765       push_warning_printf(thd, Sql_condition::SL_WARNING,
10766 			  ER_NULL_COLUMN_IN_INDEX,
10767 			  "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
10768     }
10769     error= create_unique_index(thd, unique_name, key_info);
10770     break;
10771   case ORDERED_INDEX:
10772     if (key_info->algorithm == HA_KEY_ALG_HASH)
10773     {
10774       push_warning_printf(thd, Sql_condition::SL_WARNING,
10775 			  ER_ILLEGAL_HA_CREATE_OPTION,
10776 			  ER(ER_ILLEGAL_HA_CREATE_OPTION),
10777 			  ndbcluster_hton_name,
10778 			  "Ndb does not support non-unique "
10779 			  "hash based indexes");
10780       error= HA_ERR_UNSUPPORTED;
10781       break;
10782     }
10783     error= create_ordered_index(thd, name, key_info);
10784     break;
10785   default:
10786     assert(FALSE);
10787     break;
10788   }
10789 
10790   DBUG_RETURN(error);
10791 }
10792 
create_ordered_index(THD * thd,const char * name,KEY * key_info) const10793 int ha_ndbcluster::create_ordered_index(THD *thd, const char *name,
10794                                         KEY *key_info) const
10795 {
10796   DBUG_ENTER("ha_ndbcluster::create_ordered_index");
10797   DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));
10798 }
10799 
create_unique_index(THD * thd,const char * name,KEY * key_info) const10800 int ha_ndbcluster::create_unique_index(THD *thd, const char *name,
10801                                        KEY *key_info) const
10802 {
10803 
10804   DBUG_ENTER("ha_ndbcluster::create_unique_index");
10805   DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));
10806 }
10807 
10808 
10809 /**
10810   Create an index in NDB Cluster.
10811 
10812   @todo
10813     Only temporary ordered indexes supported
10814 */
10815 
create_ndb_index(THD * thd,const char * name,KEY * key_info,bool unique) const10816 int ha_ndbcluster::create_ndb_index(THD *thd, const char *name,
10817                                     KEY *key_info,
10818                                     bool unique) const
10819 {
10820   char index_name[FN_LEN + 1];
10821   Ndb *ndb= get_ndb(thd);
10822   NdbDictionary::Dictionary *dict= ndb->getDictionary();
10823   KEY_PART_INFO *key_part= key_info->key_part;
10824   KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
10825 
10826   DBUG_ENTER("ha_ndbcluster::create_index");
10827   DBUG_PRINT("enter", ("name: %s ", name));
10828 
10829   ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');
10830   DBUG_PRINT("info", ("index name: %s ", index_name));
10831 
10832   NdbDictionary::Index ndb_index(index_name);
10833   if (unique)
10834     ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
10835   else
10836   {
10837     ndb_index.setType(NdbDictionary::Index::OrderedIndex);
10838     // TODO Only temporary ordered indexes supported
10839     ndb_index.setLogging(FALSE);
10840   }
10841   if (!m_table->getLogging())
10842     ndb_index.setLogging(FALSE);
10843   if (((NDBTAB*)m_table)->getTemporary())
10844     ndb_index.setTemporary(TRUE);
10845   if (ndb_index.setTable(m_tabname))
10846   {
10847     set_my_errno(errno);
10848     DBUG_RETURN(errno);
10849   }
10850 
10851   for (; key_part != end; key_part++)
10852   {
10853     Field *field= key_part->field;
10854     if (field->field_storage_type() == HA_SM_DISK)
10855     {
10856       push_warning_printf(thd, Sql_condition::SL_WARNING,
10857                           ER_ILLEGAL_HA_CREATE_OPTION,
10858                           ER(ER_ILLEGAL_HA_CREATE_OPTION),
10859                           ndbcluster_hton_name,
10860                           "Index on field "
10861                           "declared with "
10862                           "STORAGE DISK is not supported");
10863       DBUG_RETURN(HA_ERR_UNSUPPORTED);
10864     }
10865     DBUG_PRINT("info", ("attr: %s", field->field_name));
10866     if (ndb_index.addColumnName(field->field_name))
10867     {
10868       set_my_errno(errno);
10869       DBUG_RETURN(errno);
10870     }
10871   }
10872 
10873   if (dict->createIndex(ndb_index, *m_table))
10874     ERR_RETURN(dict->getNdbError());
10875 
10876   // Success
10877   DBUG_PRINT("info", ("Created index %s", name));
10878   DBUG_RETURN(0);
10879 }
10880 
10881 /*
10882  Prepare for an on-line alter table
10883 */
prepare_for_alter()10884 void ha_ndbcluster::prepare_for_alter()
10885 {
10886   /* ndb_share reference schema */
10887   ndbcluster_get_share(m_share); // Increase ref_count
10888   DBUG_PRINT("NDB_SHARE", ("%s binlog schema  use_count: %u",
10889                            m_share->key_string(), m_share->use_count));
10890   set_ndb_share_state(m_share, NSS_ALTERED);
10891 }
10892 
10893 /*
10894   Add an index on-line to a table
10895 */
10896 /*
10897 int ha_ndbcluster::add_index(TABLE *table_arg,
10898                              KEY *key_info, uint num_of_keys,
10899                              handler_add_index **add)
10900 {
10901   // TODO: As we don't yet implement ::final_add_index(),
10902   // we don't need a handler_add_index object either..?
10903   *add= NULL; // new handler_add_index(table_arg, key_info, num_of_keys);
10904   return add_index_impl(current_thd, table_arg, key_info, num_of_keys);
10905 }
10906 */
10907 
add_index_impl(THD * thd,TABLE * table_arg,KEY * key_info,uint num_of_keys)10908 int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg,
10909                                   KEY *key_info, uint num_of_keys)
10910 {
10911   int error= 0;
10912   uint idx;
10913   DBUG_ENTER("ha_ndbcluster::add_index");
10914   DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
10915   assert(m_share->state == NSS_ALTERED);
10916 
10917   for (idx= 0; idx < num_of_keys; idx++)
10918   {
10919     KEY *key= key_info + idx;
10920     KEY_PART_INFO *key_part= key->key_part;
10921     KEY_PART_INFO *end= key_part + key->user_defined_key_parts;
10922     NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
10923     DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
10924     // Add fields to key_part struct
10925     for (; key_part != end; key_part++)
10926       key_part->field= table->field[key_part->fieldnr];
10927     // Check index type
10928     // Create index in ndb
10929     if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))
10930       break;
10931   }
10932   DBUG_RETURN(error);
10933 }
10934 
10935 /*
10936   Mark one or several indexes for deletion. and
10937   renumber the remaining indexes
10938 */
prepare_drop_index(TABLE * table_arg,uint * key_num,uint num_of_keys)10939 int ha_ndbcluster::prepare_drop_index(TABLE *table_arg,
10940                                       uint *key_num, uint num_of_keys)
10941 {
10942   DBUG_ENTER("ha_ndbcluster::prepare_drop_index");
10943   assert(m_share->state == NSS_ALTERED);
10944   // Mark indexes for deletion
10945   uint idx;
10946   for (idx= 0; idx < num_of_keys; idx++)
10947   {
10948     DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num));
10949     uint i = *key_num++;
10950     m_index[i].status= TO_BE_DROPPED;
10951     // Prepare delete of index stat entry
10952     if (m_index[i].type == PRIMARY_KEY_ORDERED_INDEX ||
10953         m_index[i].type == UNIQUE_ORDERED_INDEX ||
10954         m_index[i].type == ORDERED_INDEX)
10955     {
10956       const NdbDictionary::Index *index= m_index[i].index;
10957       if (index) // safety
10958       {
10959         int index_id= index->getObjectId();
10960         int index_version= index->getObjectVersion();
10961         ndb_index_stat_free(m_share, index_id, index_version);
10962       }
10963     }
10964   }
10965   // Renumber indexes
10966   THD *thd= current_thd;
10967   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10968   Ndb *ndb= thd_ndb->ndb;
10969   renumber_indexes(ndb, table_arg);
10970   DBUG_RETURN(0);
10971 }
10972 
10973 /*
10974   Really drop all indexes marked for deletion
10975 */
final_drop_index(TABLE * table_arg)10976 int ha_ndbcluster::final_drop_index(TABLE *table_arg)
10977 {
10978   int error;
10979   DBUG_ENTER("ha_ndbcluster::final_drop_index");
10980   // Really drop indexes
10981   THD *thd= current_thd;
10982   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10983   Ndb *ndb= thd_ndb->ndb;
10984   error= drop_indexes(ndb, table_arg);
10985   DBUG_RETURN(error);
10986 }
10987 
10988 
10989 extern void ndb_fk_util_resolve_mock_tables(THD* thd,
10990                                             NdbDictionary::Dictionary* dict,
10991                                             const char* new_parent_db,
10992                                             const char* new_parent_name);
10993 
10994 
10995 int
rename_table_impl(THD * thd,Ndb * ndb,const NdbDictionary::Table * orig_tab,const char * from,const char * to,const char * old_dbname,const char * old_tabname,const char * new_dbname,const char * new_tabname,bool real_rename,const char * real_rename_db,const char * real_rename_name,bool real_rename_log_on_participant,bool drop_events,bool create_events,bool commit_alter)10996 ha_ndbcluster::rename_table_impl(THD* thd, Ndb* ndb,
10997                                  const NdbDictionary::Table* orig_tab,
10998                                  const char* from, const char* to,
10999                                  const char* old_dbname,
11000                                  const char* old_tabname,
11001                                  const char* new_dbname,
11002                                  const char* new_tabname,
11003                                  bool real_rename,
11004                                  const char* real_rename_db,
11005                                  const char* real_rename_name,
11006                                  bool real_rename_log_on_participant,
11007                                  bool drop_events,
11008                                  bool create_events,
11009                                  bool commit_alter)
11010 {
11011   DBUG_ENTER("ha_ndbcluster::rename_table_impl");
11012   DBUG_PRINT("info", ("real_rename: %d", real_rename));
11013   DBUG_PRINT("info", ("real_rename_db: '%s'", real_rename_db));
11014   DBUG_PRINT("info", ("real_rename_name: '%s'", real_rename_name));
11015   DBUG_PRINT("info", ("real_rename_log_on_participant: %d",
11016                       real_rename_log_on_participant));
11017   // Verify default values of real_rename related parameters
11018   assert(real_rename ||
11019          (real_rename_db == NULL &&
11020           real_rename_name == NULL &&
11021           real_rename_log_on_participant == false));
11022 
11023   DBUG_PRINT("info", ("drop_events: %d", drop_events));
11024   DBUG_PRINT("info", ("create_events: %d", create_events));
11025   DBUG_PRINT("info", ("commit_alter: %d", commit_alter));
11026 
11027   NDBDICT* dict = ndb->getDictionary();
11028   NDBDICT::List index_list;
11029   if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
11030   {
11031     // When moving tables between databases the indexes need to be
11032     // recreated, save list of indexes before rename to allow
11033     // them to be recreated afterwards
11034     dict->listIndexes(index_list, *orig_tab);
11035   }
11036 
11037   // Change current database to that of target table
11038   if (ndb->setDatabaseName(new_dbname))
11039   {
11040     ERR_RETURN(ndb->getNdbError());
11041   }
11042 
11043   const int ndb_table_id= orig_tab->getObjectId();
11044   const int ndb_table_version= orig_tab->getObjectVersion();
11045 
11046   Ndb_share_temp_ref share(from);
11047   if (real_rename)
11048   {
11049     /*
11050       Prepare the rename on the participant, i.e make the participant
11051       save the final table name in the NDB_SHARE of the table to be renamed.
11052 
11053       NOTE! The tricky thing here is that the NDB_SHARE haven't yet been
11054       renamed on the participant and thus you have to use the original
11055       table name when communicating with the participant, otherwise it
11056       will not find the share where to stash the final table name.
11057 
11058       Also note that the main reason for doing this prepare phase
11059       (which the participant can't refuse) is due to lack of placeholders
11060       available in the schema dist protocol. There are simply not
11061       enough placeholders available to transfer all required parameters
11062       at once.
11063    */
11064     ndbcluster_log_schema_op(thd, to, (int)strlen(to),
11065                              real_rename_db, real_rename_name,
11066                              ndb_table_id, ndb_table_version,
11067                              SOT_RENAME_TABLE_PREPARE,
11068                              new_dbname /* unused */,
11069                              new_tabname /* unused */);
11070   }
11071   NDB_SHARE_KEY* old_key = share->key; // Save current key
11072   NDB_SHARE_KEY* new_key = NDB_SHARE::create_key(to);
11073   (void)ndbcluster_rename_share(thd, share, new_key);
11074 
11075   NdbDictionary::Table new_tab= *orig_tab;
11076   new_tab.setName(new_tabname);
11077   if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
11078   {
11079     const NdbError ndb_error= dict->getNdbError();
11080     // Rename the share back to old_key
11081     (void)ndbcluster_rename_share(thd, share, old_key);
11082     // Release the unused new_key
11083     NDB_SHARE::free_key(new_key);
11084     ERR_RETURN(ndb_error);
11085   }
11086   // Release the unused old_key
11087   NDB_SHARE::free_key(old_key);
11088 
11089   ndb_fk_util_resolve_mock_tables(thd, ndb->getDictionary(),
11090                                   new_dbname, new_tabname);
11091 
11092   {
11093     // Rename .ndb file
11094     int result;
11095     if ((result= handler::rename_table(from, to)))
11096     {
11097       // ToDo in 4.1 should rollback alter table...
11098 
11099       DBUG_RETURN(result);
11100     }
11101   }
11102 
11103   /* handle old table */
11104   if (drop_events)
11105   {
11106     ndbcluster_drop_event(thd, ndb, share,
11107                           old_dbname, old_tabname);
11108   }
11109 
11110   if (create_events)
11111   {
11112     Ndb_table_guard ndbtab_g2(dict, new_tabname);
11113     const NDBTAB *ndbtab= ndbtab_g2.get_table();
11114 #ifdef HAVE_NDB_BINLOG
11115     ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,
11116                                        ::server_id, TRUE);
11117 #endif
11118     /* always create an event for the table */
11119     String event_name(INJECTOR_EVENT_LEN);
11120     ndb_rep_event_name(&event_name, new_dbname, new_tabname,
11121                        get_binlog_full(share));
11122 
11123     if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,
11124                                                        new_tabname) &&
11125         !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,
11126                                  ndb_binlog_running ? 2 : 1/* push warning */))
11127     {
11128       if (opt_ndb_extra_logging)
11129         sql_print_information("NDB Binlog: RENAME Event: %s",
11130                               event_name.c_ptr());
11131       if (share->op == 0 &&
11132           ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))
11133       {
11134         sql_print_error("NDB Binlog: FAILED create event operations "
11135                         "during RENAME. Event %s", event_name.c_ptr());
11136         /* a warning has been issued to the client */
11137       }
11138     }
11139     /*
11140       warning has been issued if ndbcluster_create_event failed
11141       and ndb_binlog_running
11142     */
11143   }
11144 
11145   if (real_rename)
11146   {
11147     /*
11148       Commit of "real" rename table on participant i.e make the participant
11149       extract the original table name which it got in prepare.
11150 
11151       NOTE! The tricky thing also here is that the NDB_SHARE haven't yet been
11152       renamed on the participant and thus you have to use the original
11153       table name when communicating with the participant, otherwise it
11154       will not find the share where the final table name has been stashed.
11155 
11156       Also note the special flag which control wheter or not this
11157       query is written to binlog or not on the participants.
11158     */
11159     ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
11160                              real_rename_db, real_rename_name,
11161                              ndb_table_id, ndb_table_version,
11162                              SOT_RENAME_TABLE,
11163                              new_dbname, new_tabname,
11164                              real_rename_log_on_participant);
11165   }
11166 
11167   if (commit_alter)
11168   {
11169     /* final phase of offline alter table */
11170     ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
11171                              new_dbname, new_tabname,
11172                              ndb_table_id, ndb_table_version,
11173                              SOT_ALTER_TABLE_COMMIT,
11174                              NULL, NULL);
11175   }
11176 
11177   for (unsigned i = 0; i < index_list.count; i++)
11178   {
11179     NDBDICT::List::Element& index_el = index_list.elements[i];
11180     // Recreate any indexes not stored in the system database
11181     if (my_strcasecmp(system_charset_info,
11182                       index_el.database, NDB_SYSTEM_DATABASE))
11183     {
11184       // Get old index
11185       ndb->setDatabaseName(old_dbname);
11186       const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);
11187       DBUG_PRINT("info", ("Creating index %s/%s",
11188                           index_el.database, index->getName()));
11189       // Create the same "old" index on new tab
11190       dict->createIndex(*index, new_tab);
11191       DBUG_PRINT("info", ("Dropping index %s/%s",
11192                           index_el.database, index->getName()));
11193       // Drop old index
11194       ndb->setDatabaseName(old_dbname);
11195       dict->dropIndexGlobal(*index);
11196     }
11197   }
11198   DBUG_RETURN(0);
11199 }
11200 
11201 
11202 /**
11203   Rename a table in NDB and on the participating mysqld(s)
11204 */
11205 
rename_table(const char * from,const char * to)11206 int ha_ndbcluster::rename_table(const char *from, const char *to)
11207 {
11208   THD *thd= current_thd;
11209   char old_dbname[FN_HEADLEN];
11210   char new_dbname[FN_HEADLEN];
11211   char new_tabname[FN_HEADLEN];
11212 
11213   DBUG_ENTER("ha_ndbcluster::rename_table");
11214   DBUG_PRINT("info", ("Renaming %s to %s", from, to));
11215 
11216   /*
11217     ALTER RENAME with some more change is currently not supported
11218     by Ndb due to
11219     Bug #16021021 ALTER ... RENAME FAILS TO RENAME ON PARTICIPANT MYSQLD
11220 
11221     Check if command is not RENAME and some more alter_flag
11222     except ALTER_RENAME is set.
11223   */
11224   if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
11225   {
11226     Alter_info *alter_info= &(thd->lex->alter_info);
11227     uint flags= alter_info->flags;
11228 
11229     if (flags & Alter_info::ALTER_RENAME && flags & ~Alter_info::ALTER_RENAME)
11230     {
11231       my_error(ER_NOT_SUPPORTED_YET, MYF(0), thd->query().str);
11232       DBUG_RETURN(ER_NOT_SUPPORTED_YET);
11233     }
11234   }
11235 
11236   set_dbname(from, old_dbname);
11237   set_dbname(to, new_dbname);
11238   set_tabname(from);
11239   set_tabname(to, new_tabname);
11240 
11241   DBUG_PRINT("info", ("old_tabname: '%s'", m_tabname));
11242   DBUG_PRINT("info", ("new_tabname: '%s'", new_tabname));
11243 
11244   /* Check that the new table or database name does not exceed max limit */
11245   if (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
11246        strlen(new_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
11247   {
11248     char *invalid_identifier=
11249         (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE) ?
11250           new_dbname : new_tabname;
11251     push_warning_printf(thd, Sql_condition::SL_WARNING,
11252                         ER_TOO_LONG_IDENT,
11253                         "Ndb has an internal limit of %u bytes on the "\
11254                         "size of schema identifiers",
11255                         NDB_MAX_DDL_NAME_BYTESIZE);
11256     my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
11257     DBUG_RETURN(HA_WRONG_CREATE_OPTION);
11258   }
11259 
11260   if (check_ndb_connection(thd))
11261     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11262 
11263   Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);
11264   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
11265     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11266 
11267   // Open the table which is to be renamed(aka. the old)
11268   Ndb *ndb= get_ndb(thd);
11269   ndb->setDatabaseName(old_dbname);
11270   NDBDICT *dict= ndb->getDictionary();
11271   Ndb_table_guard ndbtab_g(dict, m_tabname);
11272   const NDBTAB *orig_tab;
11273   if (!(orig_tab= ndbtab_g.get_table()))
11274     ERR_RETURN(dict->getNdbError());
11275   DBUG_PRINT("info", ("NDB table name: '%s'", orig_tab->getName()));
11276 
11277   // Magically detect if this is a rename or some form of alter
11278   // and decide which actions need to be performed
11279   const bool old_is_temp = IS_TMP_PREFIX(m_tabname);
11280   const bool new_is_temp = IS_TMP_PREFIX(new_tabname);
11281   switch (thd_sql_command(thd))
11282   {
11283   case SQLCOM_DROP_INDEX:
11284   case SQLCOM_CREATE_INDEX:
11285     DBUG_PRINT("info", ("CREATE or DROP INDEX as copying ALTER"));
11286     // fallthrough
11287   case SQLCOM_ALTER_TABLE:
11288     DBUG_PRINT("info", ("SQLCOM_ALTER_TABLE"));
11289 
11290     if (!new_is_temp && !old_is_temp)
11291     {
11292       /*
11293         This is a rename directly from real to real which occurs:
11294         1) when the ALTER is "simple" RENAME i.e only consists of RENAME
11295            and/or enable/disable indexes
11296         2) as part of inplace ALTER .. RENAME
11297        */
11298       DBUG_PRINT("info", ("simple rename detected"));
11299       DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11300                                     old_dbname, m_tabname,
11301                                     new_dbname, new_tabname,
11302                                     true, // real_rename
11303                                     old_dbname, // real_rename_db
11304                                     m_tabname, // real_rename_name
11305                                     true, // real_rename_log_on_participants
11306                                     true, // drop_events
11307                                     true, // create events
11308                                     false)); // commit_alter
11309     }
11310 
11311     // Make sure that inplace was not requested
11312     assert(thd->lex->alter_info.requested_algorithm !=
11313            Alter_info::ALTER_TABLE_ALGORITHM_INPLACE);
11314 
11315     /*
11316       This is a copying alter table which is implemented as
11317       1) Create destination table with temporary name
11318           -> ha_ndbcluster::create_table('#sql_75636-87')
11319           There are now the source table and one with temporary name:
11320              [t1] + [#sql_75636-87]
11321       2) Copy data from source table to destination table.
11322       3) Backup the source table by renaming it to another temporary name.
11323           -> ha_ndbcluster::rename_table('t1', '#sql_86545-98')
11324           There are now two temporary named tables:
11325             [#sql_86545-98] + [#sql_75636-87]
11326       4) Rename the destination table to it's real name.
11327           ->  ha_ndbcluster::rename_table('#sql_75636-87', 't1')
11328       5) Drop the source table
11329 
11330 
11331     */
11332 
11333     if (new_is_temp)
11334     {
11335       /*
11336         This is an alter table which renames real name to temp name.
11337         ie. step 3) per above and is the first of
11338         two rename_table() calls. Drop events from the table.
11339       */
11340       DBUG_PRINT("info", ("real -> temp"));
11341       DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11342                                     old_dbname, m_tabname,
11343                                     new_dbname, new_tabname,
11344                                     false, // real_rename
11345                                     NULL, // real_rename_db
11346                                     NULL, // real_rename_name
11347                                     false, // real_rename_log_on_participants
11348                                     true, // drop_events
11349                                     false, // create events
11350                                     false)); // commit_alter
11351     }
11352 
11353     if (old_is_temp)
11354     {
11355       /*
11356         This is an alter table which renames temp name to real name.
11357         ie. step 5) per above and is the second call to rename_table().
11358         Create new events and commit the alter so that participant are
11359         made aware that the table changed and can reopen the table.
11360       */
11361       DBUG_PRINT("info", ("temp -> real"));
11362 
11363       /*
11364         Detect if this is the special case which occurs when
11365         the table is both altered and renamed.
11366 
11367         Important here is to remeber to rename the table also
11368         on all partiticipants so they will find the table when
11369         the alter is completed. This is slightly problematic since
11370         their table is renamed directly from real to real name, while
11371         the mysqld who performs the alter renames from temp to real
11372         name. Fortunately it's possible to lookup the original table
11373         name via THD.
11374       */
11375       const char* orig_name = thd->lex->select_lex->table_list.first->table_name;
11376       const char* orig_db = thd->lex->select_lex->table_list.first->db;
11377       if (thd->lex->alter_info.flags & Alter_info::ALTER_RENAME &&
11378           (my_strcasecmp(system_charset_info, orig_db, new_dbname) ||
11379            my_strcasecmp(system_charset_info, orig_name, new_tabname)))
11380       {
11381         DBUG_PRINT("info", ("ALTER with RENAME detected"));
11382         /*
11383           Use the original table name when communicating with participant
11384         */
11385         const char* real_rename_db = orig_db;
11386         const char* real_rename_name = orig_name;
11387 
11388         /*
11389           Don't log the rename query on participant since that would
11390           cause both an ALTER TABLE RENAME and RENAME TABLE to appear in
11391           the binlog
11392         */
11393         const bool real_rename_log_on_participant = false;
11394         DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
11395                                       old_dbname, m_tabname,
11396                                       new_dbname, new_tabname,
11397                                       true, // real_rename
11398                                       real_rename_db,
11399                                       real_rename_name,
11400                                       real_rename_log_on_participant,
11401                                       false, // drop_events
11402                                       true, // create events
11403                                       true)); // commit_alter
11404       }
11405 
11406       DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
11407                                     old_dbname, m_tabname,
11408                                     new_dbname, new_tabname,
11409                                     false, // real_rename
11410                                     NULL, // real_rename_db
11411                                     NULL, // real_rename_name
11412                                     false, // real_rename_log_on_participants
11413                                     false, // drop_events
11414                                     true, // create events
11415                                     true)); // commit_alter
11416     }
11417     break;
11418 
11419   case SQLCOM_RENAME_TABLE:
11420     DBUG_PRINT("info", ("SQLCOM_RENAME_TABLE"));
11421 
11422     DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
11423                                   old_dbname, m_tabname,
11424                                   new_dbname, new_tabname,
11425                                   true, // real_rename
11426                                   old_dbname, // real_rename_db
11427                                   m_tabname, // real_rename_name
11428                                   true, // real_rename_log_on_participants
11429                                   true, // drop_events
11430                                   true, // create events
11431                                   false)); // commit_alter
11432     break;
11433 
11434   default:
11435     sql_print_error("Unexpected rename case detected, sql_command: %d",
11436                     thd_sql_command(thd));
11437     abort();
11438     break;
11439   }
11440 
11441   // Never reached
11442   DBUG_RETURN(HA_ERR_UNSUPPORTED);
11443 }
11444 
11445 
11446 /**
11447   Delete table from NDB Cluster.
11448 */
11449 
11450 static
11451 void
delete_table_drop_share(NDB_SHARE * share,const char * path)11452 delete_table_drop_share(NDB_SHARE* share, const char * path)
11453 {
11454   DBUG_ENTER("delete_table_drop_share");
11455   if (share)
11456   {
11457     native_mutex_lock(&ndbcluster_mutex);
11458 do_drop:
11459     if (share->state != NSS_DROPPED)
11460     {
11461       /*
11462         The share kept by the server has not been freed, free it
11463       */
11464       ndbcluster_mark_share_dropped(share);
11465       /* ndb_share reference create free */
11466       DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
11467                                share->key_string(), share->use_count));
11468       free_share(&share, TRUE);
11469     }
11470     /* ndb_share reference temporary free */
11471     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
11472                              share->key_string(), share->use_count));
11473     free_share(&share, TRUE);
11474     native_mutex_unlock(&ndbcluster_mutex);
11475   }
11476   else if (path)
11477   {
11478     native_mutex_lock(&ndbcluster_mutex);
11479     share= get_share(path, 0, FALSE, TRUE);
11480     if (share)
11481     {
11482       goto do_drop;
11483     }
11484     native_mutex_unlock(&ndbcluster_mutex);
11485   }
11486   DBUG_VOID_RETURN;
11487 }
11488 
11489 
11490 // Declare adapter functions for Dummy_table_util function
11491 extern bool ndb_fk_util_build_list(THD*, NdbDictionary::Dictionary*,
11492                                    const NdbDictionary::Table*, List<char>&);
11493 extern void ndb_fk_util_drop_list(THD*, Ndb* ndb, NdbDictionary::Dictionary*, List<char>&);
11494 extern bool ndb_fk_util_drop_table(THD*, Ndb* ndb, NdbDictionary::Dictionary*,
11495                                    const NdbDictionary::Table*);
11496 extern bool ndb_fk_util_is_mock_name(const char* table_name);
11497 
11498 bool
drop_table_and_related(THD * thd,Ndb * ndb,NdbDictionary::Dictionary * dict,const NdbDictionary::Table * table,int drop_flags,bool skip_related)11499 ha_ndbcluster::drop_table_and_related(THD* thd, Ndb* ndb, NdbDictionary::Dictionary* dict,
11500                                       const NdbDictionary::Table* table,
11501                                       int drop_flags, bool skip_related)
11502 {
11503   DBUG_ENTER("drop_table_and_related");
11504   DBUG_PRINT("enter", ("cascade_constraints: %d dropdb: %d skip_related: %d",
11505                        MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraints),
11506                        MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraintsDropDB),
11507                        skip_related));
11508 
11509   /*
11510     Build list of objects which should be dropped after the table
11511     unless the caller ask to skip dropping related
11512   */
11513   List<char> drop_list;
11514   if (!skip_related &&
11515       !ndb_fk_util_build_list(thd, dict, table, drop_list))
11516   {
11517     DBUG_RETURN(false);
11518   }
11519 
11520   // Drop the table
11521   if (dict->dropTableGlobal(*table, drop_flags) != 0)
11522   {
11523     const NdbError& ndb_err = dict->getNdbError();
11524     if (ndb_err.code == 21080 &&
11525         thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
11526     {
11527       /*
11528         Drop was not allowed because table is still referenced by
11529         foreign key(s). Since foreign_key_checks=0 the problem is
11530         worked around by creating a mock table, recreating the foreign
11531         key(s) to point at the mock table and finally dropping
11532         the requested table.
11533       */
11534       if (!ndb_fk_util_drop_table(thd, ndb, dict, table))
11535       {
11536         DBUG_RETURN(false);
11537       }
11538     }
11539     else
11540     {
11541       DBUG_RETURN(false);
11542     }
11543   }
11544 
11545   // Drop objects which should be dropped after table
11546   ndb_fk_util_drop_list(thd, ndb, dict, drop_list);
11547 
11548   DBUG_RETURN(true);
11549 }
11550 
11551 
11552 /* static version which does not need a handler */
11553 
11554 int
drop_table_impl(THD * thd,ha_ndbcluster * h,Ndb * ndb,const char * path,const char * db,const char * table_name)11555 ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,
11556                                const char *path,
11557                                const char *db,
11558                                const char *table_name)
11559 {
11560   DBUG_ENTER("ha_ndbcluster::drop_table_impl");
11561   NDBDICT *dict= ndb->getDictionary();
11562   int ndb_table_id= 0;
11563   int ndb_table_version= 0;
11564 
11565   if (!ndb_schema_dist_is_ready())
11566   {
11567     /* Don't allow drop table unless schema distribution is ready */
11568     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11569   }
11570   /* ndb_share reference temporary */
11571   NDB_SHARE *share= get_share(path, 0, FALSE);
11572   if (share)
11573   {
11574     DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
11575                              share->key_string(), share->use_count));
11576   }
11577 
11578   bool skip_related= false;
11579   int drop_flags = 0;
11580   /* Copying alter can leave #sql table which is parent of old FKs */
11581   if (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
11582       strncmp(table_name, "#sql", 4) == 0)
11583   {
11584     DBUG_PRINT("info", ("Using cascade constraints for ALTER of temp table"));
11585     drop_flags |= NDBDICT::DropTableCascadeConstraints;
11586     // Cascade constraint is used and related will be dropped anyway
11587     skip_related = true;
11588   }
11589 
11590   if (thd->lex->sql_command == SQLCOM_DROP_DB)
11591   {
11592     DBUG_PRINT("info", ("Using cascade constraints DB for drop database"));
11593     drop_flags |= NDBDICT::DropTableCascadeConstraintsDropDB;
11594   }
11595 
11596   if (thd->lex->sql_command == SQLCOM_TRUNCATE)
11597   {
11598     DBUG_PRINT("info", ("Deleting table for TRUNCATE, skip dropping related"));
11599     skip_related= true;
11600   }
11601 
11602   /* Drop the table from NDB */
11603   int res= 0;
11604   if (h && h->m_table)
11605   {
11606 retry_temporary_error1:
11607     if (drop_table_and_related(thd, ndb, dict, h->m_table,
11608                                drop_flags, skip_related))
11609     {
11610       ndb_table_id= h->m_table->getObjectId();
11611       ndb_table_version= h->m_table->getObjectVersion();
11612       DBUG_PRINT("info", ("success 1"));
11613     }
11614     else
11615     {
11616       switch (dict->getNdbError().status)
11617       {
11618         case NdbError::TemporaryError:
11619           if (!thd->killed)
11620             goto retry_temporary_error1; // retry indefinitly
11621           break;
11622         default:
11623           break;
11624       }
11625       res= ndb_to_mysql_error(&dict->getNdbError());
11626       DBUG_PRINT("info", ("error(1) %u", res));
11627     }
11628     h->release_metadata(thd, ndb);
11629   }
11630   else
11631   {
11632     ndb->setDatabaseName(db);
11633     while (1)
11634     {
11635       Ndb_table_guard ndbtab_g(dict, table_name);
11636       if (ndbtab_g.get_table())
11637       {
11638     retry_temporary_error2:
11639         if (drop_table_and_related(thd, ndb, dict, ndbtab_g.get_table(),
11640                                    drop_flags, skip_related))
11641         {
11642           ndb_table_id= ndbtab_g.get_table()->getObjectId();
11643           ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
11644           DBUG_PRINT("info", ("success 2"));
11645           break;
11646         }
11647         else
11648         {
11649           switch (dict->getNdbError().status)
11650           {
11651             case NdbError::TemporaryError:
11652               if (!thd->killed)
11653                 goto retry_temporary_error2; // retry indefinitly
11654               break;
11655             default:
11656               if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
11657               {
11658                 ndbtab_g.invalidate();
11659                 continue;
11660               }
11661               break;
11662           }
11663         }
11664       }
11665       res= ndb_to_mysql_error(&dict->getNdbError());
11666       DBUG_PRINT("info", ("error(2) %u", res));
11667       break;
11668     }
11669   }
11670 
11671   if (res)
11672   {
11673     /* the drop table failed for some reason, drop the share anyways */
11674     delete_table_drop_share(share, 0);
11675     DBUG_RETURN(res);
11676   }
11677 
11678   /* stop the logging of the dropped table, and cleanup */
11679 
11680   /*
11681     drop table is successful even if table does not exist in ndb
11682     and in case table was actually not dropped, there is no need
11683     to force a gcp, and setting the event_name to null will indicate
11684     that there is no event to be dropped
11685   */
11686   int table_dropped= dict->getNdbError().code != 709;
11687 
11688   {
11689     if (table_dropped)
11690     {
11691       ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
11692                                    db, table_name);
11693     }
11694     else
11695     {
11696       /**
11697        * Setting 0,0 will cause ndbcluster_drop_event *not* to be called
11698        */
11699       ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
11700                                    0, 0);
11701     }
11702   }
11703 
11704   if (!IS_TMP_PREFIX(table_name) && share &&
11705       thd->lex->sql_command != SQLCOM_TRUNCATE)
11706   {
11707     ndbcluster_log_schema_op(thd,
11708                              thd->query().str, thd->query().length,
11709                              share->db, share->table_name,
11710                              ndb_table_id, ndb_table_version,
11711                              SOT_DROP_TABLE, NULL, NULL);
11712   }
11713 
11714   delete_table_drop_share(share, 0);
11715   DBUG_RETURN(0);
11716 }
11717 
delete_table(const char * name)11718 int ha_ndbcluster::delete_table(const char *name)
11719 {
11720   THD *thd= current_thd;
11721 
11722   DBUG_ENTER("ha_ndbcluster::delete_table");
11723   DBUG_PRINT("enter", ("name: %s", name));
11724 
11725   if (thd == injector_thd)
11726   {
11727     /*
11728       Table was dropped remotely is already
11729       dropped inside ndb.
11730       Just drop local files.
11731     */
11732     DBUG_PRINT("info", ("Table is already dropped in NDB"));
11733     delete_table_drop_share(0, name);
11734     DBUG_RETURN(handler::delete_table(name));
11735   }
11736 
11737   set_dbname(name);
11738   set_tabname(name);
11739 
11740   if (!ndb_schema_dist_is_ready())
11741   {
11742     /* Don't allow drop table unless schema distribution is ready */
11743     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11744   }
11745 
11746   if (check_ndb_connection(thd))
11747   {
11748     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11749   }
11750 
11751   Thd_ndb *thd_ndb= get_thd_ndb(thd);
11752   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))
11753   {
11754     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11755   }
11756 
11757   /*
11758     Drop table in ndb.
11759     If it was already gone it might have been dropped
11760     remotely, give a warning and then drop .ndb file.
11761    */
11762   int error;
11763   Ndb* ndb= thd_ndb->ndb;
11764   if (!(error= drop_table_impl(thd, this, ndb, name,
11765                                m_dbname, m_tabname)) ||
11766       error == HA_ERR_NO_SUCH_TABLE)
11767   {
11768     /* Call ancestor function to delete .ndb file */
11769     int error1= handler::delete_table(name);
11770     if (!error)
11771       error= error1;
11772   }
11773 
11774   DBUG_RETURN(error);
11775 }
11776 
11777 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)11778 void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
11779                                        ulonglong nb_desired_values,
11780                                        ulonglong *first_value,
11781                                        ulonglong *nb_reserved_values)
11782 {
11783   Uint64 auto_value;
11784   THD *thd= current_thd;
11785   DBUG_ENTER("get_auto_increment");
11786   DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
11787   Ndb *ndb= get_ndb(table->in_use);
11788   uint retries= NDB_AUTO_INCREMENT_RETRIES;
11789   int retry_sleep= 30; /* 30 milliseconds, transaction */
11790   for (;;)
11791   {
11792     Ndb_tuple_id_range_guard g(m_share);
11793     if ((m_skip_auto_increment &&
11794          ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
11795         ndb->getAutoIncrementValue(m_table, g.range, auto_value,
11796                                    Uint32(m_autoincrement_prefetch),
11797                                    increment, offset))
11798     {
11799       if (--retries && !thd->killed &&
11800           ndb->getNdbError().status == NdbError::TemporaryError)
11801       {
11802         do_retry_sleep(retry_sleep);
11803         continue;
11804       }
11805       const NdbError err= ndb->getNdbError();
11806       sql_print_error("Error %lu in ::get_auto_increment(): %s",
11807                       (ulong) err.code, err.message);
11808       *first_value= ~(ulonglong) 0;
11809       DBUG_VOID_RETURN;
11810     }
11811     break;
11812   }
11813   *first_value= (longlong)auto_value;
11814   /* From the point of view of MySQL, NDB reserves one row at a time */
11815   *nb_reserved_values= 1;
11816   DBUG_VOID_RETURN;
11817 }
11818 
11819 
11820 /**
11821   Constructor for the NDB Cluster table handler .
11822 */
11823 
ha_ndbcluster(handlerton * hton,TABLE_SHARE * table_arg)11824 ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
11825   handler(hton, table_arg),
11826   m_thd_ndb(NULL),
11827   m_active_cursor(NULL),
11828   m_table(NULL),
11829   m_ndb_record(0),
11830   m_ndb_hidden_key_record(0),
11831   m_table_info(NULL),
11832   m_share(0),
11833   m_key_fields(NULL),
11834   m_part_info(NULL),
11835   m_user_defined_partitioning(FALSE),
11836   m_use_partition_pruning(FALSE),
11837   m_sorted(FALSE),
11838   m_use_write(FALSE),
11839   m_ignore_dup_key(FALSE),
11840   m_has_unique_index(FALSE),
11841   m_ignore_no_key(FALSE),
11842   m_read_before_write_removal_possible(FALSE),
11843   m_read_before_write_removal_used(FALSE),
11844   m_rows_updated(0),
11845   m_rows_deleted(0),
11846   m_rows_to_insert((ha_rows) 1),
11847   m_rows_inserted((ha_rows) 0),
11848   m_rows_changed((ha_rows) 0),
11849   m_delete_cannot_batch(FALSE),
11850   m_update_cannot_batch(FALSE),
11851   m_skip_auto_increment(TRUE),
11852   m_blobs_pending(0),
11853   m_is_bulk_delete(false),
11854   m_blobs_row_total_size(0),
11855   m_blobs_buffer(0),
11856   m_blobs_buffer_size(0),
11857   m_dupkey((uint) -1),
11858   m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),
11859   m_pushed_join_member(NULL),
11860   m_pushed_join_operation(-1),
11861   m_disable_pushed_join(FALSE),
11862   m_active_query(NULL),
11863   m_pushed_operation(NULL),
11864   m_cond(NULL),
11865   m_multi_cursor(NULL)
11866 {
11867   uint i;
11868 
11869   DBUG_ENTER("ha_ndbcluster");
11870 
11871   m_tabname[0]= '\0';
11872   m_dbname[0]= '\0';
11873 
11874   stats.records= ~(ha_rows)0; // uninitialized
11875   stats.block_size= 1024;
11876 
11877   for (i= 0; i < MAX_KEY; i++)
11878     ndb_init_index(m_index[i]);
11879 
11880   // make sure is initialized
11881   init_alloc_root(PSI_INSTRUMENT_ME, &m_fk_mem_root, fk_root_block_size, 0);
11882   m_fk_data= NULL;
11883 
11884   DBUG_VOID_RETURN;
11885 }
11886 
11887 
11888 /**
11889   Destructor for NDB Cluster table handler.
11890 */
11891 
~ha_ndbcluster()11892 ha_ndbcluster::~ha_ndbcluster()
11893 {
11894   THD *thd= current_thd;
11895   Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
11896   DBUG_ENTER("~ha_ndbcluster");
11897 
11898   if (m_share)
11899   {
11900     /* ndb_share reference handler free */
11901     DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
11902                              m_share->key_string(), m_share->use_count));
11903     free_share(&m_share);
11904   }
11905   release_metadata(thd, ndb);
11906   release_blobs_buffer();
11907 
11908   // Check for open cursor/transaction
11909   assert(m_thd_ndb == NULL);
11910 
11911   // Discard any generated condition
11912   DBUG_PRINT("info", ("Deleting generated condition"));
11913   if (m_cond)
11914   {
11915     delete m_cond;
11916     m_cond= NULL;
11917   }
11918   DBUG_PRINT("info", ("Deleting pushed joins"));
11919   assert(m_active_query == NULL);
11920   assert(m_active_cursor == NULL);
11921   if (m_pushed_join_operation==PUSHED_ROOT)
11922   {
11923     delete m_pushed_join_member;             // Also delete QueryDef
11924   }
11925   m_pushed_join_member= NULL;
11926 
11927   // make sure is released
11928   free_root(&m_fk_mem_root, 0);
11929   m_fk_data= NULL;
11930   DBUG_VOID_RETURN;
11931 }
11932 
11933 
11934 /**
11935   Open a table for further use
11936   - fetch metadata for this table from NDB
11937   - check that table exists
11938 
11939   @retval
11940     0    ok
11941   @retval
11942     < 0  Table has changed
11943 */
11944 
open(const char * name,int mode,uint test_if_locked)11945 int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
11946 {
11947   THD *thd= current_thd;
11948   int res;
11949   KEY *key;
11950   KEY_PART_INFO *key_part_info;
11951   uint key_parts, i, j;
11952   DBUG_ENTER("ha_ndbcluster::open");
11953   DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",
11954                        name, mode, test_if_locked));
11955 
11956   if (table_share->primary_key != MAX_KEY)
11957   {
11958     /*
11959       Setup ref_length to make room for the whole
11960       primary key to be written in the ref variable
11961     */
11962     key= table->key_info+table_share->primary_key;
11963     ref_length= key->key_length;
11964   }
11965   else
11966   {
11967     if (m_user_defined_partitioning)
11968     {
11969       /* Add space for partid in ref */
11970       ref_length+= sizeof(m_part_id);
11971     }
11972   }
11973   DBUG_PRINT("info", ("ref_length: %d", ref_length));
11974 
11975   {
11976     char* bitmap_array;
11977     uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;
11978     uint n_keys= table_share->keys + extra_hidden_keys;
11979     uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);
11980     uint map_size= sizeof(MY_BITMAP) * n_keys;
11981     m_key_fields= (MY_BITMAP**)my_malloc(PSI_INSTRUMENT_ME,
11982                                          ptr_size + map_size,
11983                                          MYF(MY_WME + MY_ZEROFILL));
11984     if (!m_key_fields)
11985     {
11986       local_close(thd, FALSE);
11987       DBUG_RETURN(1);
11988     }
11989     bitmap_array= ((char*)m_key_fields) + ptr_size;
11990     for (i= 0; i < n_keys; i++)
11991     {
11992       my_bitmap_map *bitbuf= NULL;
11993       bool is_hidden_key= (i == table_share->keys);
11994       m_key_fields[i]= (MY_BITMAP*)bitmap_array;
11995       if (is_hidden_key || (i == table_share->primary_key))
11996       {
11997         m_pk_bitmap_p= m_key_fields[i];
11998         bitbuf= m_pk_bitmap_buf;
11999       }
12000       if (bitmap_init(m_key_fields[i], bitbuf,
12001                       table_share->fields, FALSE))
12002       {
12003         m_key_fields[i]= NULL;
12004         local_close(thd, FALSE);
12005         DBUG_RETURN(1);
12006       }
12007       if (!is_hidden_key)
12008       {
12009         key= table->key_info + i;
12010         key_part_info= key->key_part;
12011         key_parts= key->user_defined_key_parts;
12012         for (j= 0; j < key_parts; j++, key_part_info++)
12013           bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);
12014       }
12015       else
12016       {
12017         uint field_no= table_share->fields;
12018         ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));
12019       }
12020       bitmap_array+= sizeof(MY_BITMAP);
12021     }
12022     m_key_fields[i]= NULL;
12023   }
12024 
12025   set_dbname(name);
12026   set_tabname(name);
12027 
12028   if ((res= check_ndb_connection(thd)) != 0)
12029   {
12030     local_close(thd, FALSE);
12031     DBUG_RETURN(res);
12032   }
12033 
12034   // Init table lock structure
12035   /* ndb_share reference handler */
12036   if ((m_share=get_share(name, table, FALSE)) == 0)
12037   {
12038     /**
12039      * No share present...we must create one
12040      */
12041     if (opt_ndb_extra_logging > 19)
12042     {
12043       sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",
12044                             name);
12045     }
12046     Ndb* ndb= check_ndb_in_thd(thd);
12047     ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
12048                                    m_dbname, m_tabname, table);
12049     if ((m_share=get_share(name, table, FALSE)) == 0)
12050     {
12051       local_close(thd, FALSE);
12052       DBUG_RETURN(1);
12053     }
12054   }
12055 
12056   DBUG_PRINT("NDB_SHARE", ("%s handler  use_count: %u",
12057                            m_share->key_string(), m_share->use_count));
12058   thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
12059 
12060   if ((res= get_metadata(thd, name)))
12061   {
12062     local_close(thd, FALSE);
12063     DBUG_RETURN(res);
12064   }
12065 
12066   if ((res= update_stats(thd, 1)) ||
12067       (res= info(HA_STATUS_CONST)))
12068   {
12069     local_close(thd, TRUE);
12070     DBUG_RETURN(res);
12071   }
12072   if (ndb_binlog_is_read_only())
12073   {
12074     table->db_stat|= HA_READ_ONLY;
12075     sql_print_information("table '%s' opened read only", name);
12076   }
12077   DBUG_RETURN(0);
12078 }
12079 
12080 /*
12081  * Support for OPTIMIZE TABLE
12082  * reclaims unused space of deleted rows
12083  * and updates index statistics
12084  */
optimize(THD * thd,HA_CHECK_OPT * check_opt)12085 int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)
12086 {
12087   ulong error, stats_error= 0;
12088   const uint delay= (uint)THDVAR(thd, optimization_delay);
12089 
12090   error= ndb_optimize_table(thd, delay);
12091   stats_error= update_stats(thd, 1);
12092   return (error) ? error : stats_error;
12093 }
12094 
ndb_optimize_table(THD * thd,uint delay)12095 int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)
12096 {
12097   Thd_ndb *thd_ndb= get_thd_ndb(thd);
12098   Ndb *ndb= thd_ndb->ndb;
12099   NDBDICT *dict= ndb->getDictionary();
12100   int result=0, error= 0;
12101   uint i;
12102   NdbDictionary::OptimizeTableHandle th;
12103   NdbDictionary::OptimizeIndexHandle ih;
12104 
12105   DBUG_ENTER("ndb_optimize_table");
12106   if ((error= dict->optimizeTable(*m_table, th)))
12107   {
12108     DBUG_PRINT("info",
12109                ("Optimze table %s returned %d", m_tabname, error));
12110     ERR_RETURN(ndb->getNdbError());
12111   }
12112   while((result= th.next()) == 1)
12113   {
12114     if (thd->killed)
12115       DBUG_RETURN(-1);
12116     my_sleep(1000*delay);
12117   }
12118   if (result == -1 || th.close() == -1)
12119   {
12120     DBUG_PRINT("info",
12121                ("Optimize table %s did not complete", m_tabname));
12122     ERR_RETURN(ndb->getNdbError());
12123   };
12124   for (i= 0; i < MAX_KEY; i++)
12125   {
12126     if (thd->killed)
12127       DBUG_RETURN(-1);
12128     if (m_index[i].status == ACTIVE)
12129     {
12130       const NdbDictionary::Index *index= m_index[i].index;
12131       const NdbDictionary::Index *unique_index= m_index[i].unique_index;
12132 
12133       if (index)
12134       {
12135         if ((error= dict->optimizeIndex(*index, ih)))
12136         {
12137           DBUG_PRINT("info",
12138                      ("Optimze index %s returned %d",
12139                       index->getName(), error));
12140           ERR_RETURN(ndb->getNdbError());
12141 
12142         }
12143         while((result= ih.next()) == 1)
12144         {
12145           if (thd->killed)
12146             DBUG_RETURN(-1);
12147           my_sleep(1000*delay);
12148         }
12149         if (result == -1 || ih.close() == -1)
12150         {
12151           DBUG_PRINT("info",
12152                      ("Optimize index %s did not complete", index->getName()));
12153           ERR_RETURN(ndb->getNdbError());
12154         }
12155       }
12156       if (unique_index)
12157       {
12158         if ((error= dict->optimizeIndex(*unique_index, ih)))
12159         {
12160           DBUG_PRINT("info",
12161                      ("Optimze unique index %s returned %d",
12162                       unique_index->getName(), error));
12163           ERR_RETURN(ndb->getNdbError());
12164         }
12165         while((result= ih.next()) == 1)
12166         {
12167           if (thd->killed)
12168             DBUG_RETURN(-1);
12169           my_sleep(1000*delay);
12170         }
12171         if (result == -1 || ih.close() == -1)
12172         {
12173           DBUG_PRINT("info",
12174                      ("Optimize index %s did not complete", index->getName()));
12175           ERR_RETURN(ndb->getNdbError());
12176         }
12177       }
12178     }
12179   }
12180   DBUG_RETURN(0);
12181 }
12182 
analyze(THD * thd,HA_CHECK_OPT * check_opt)12183 int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)
12184 {
12185   int err;
12186   if ((err= update_stats(thd, 1)) != 0)
12187     return err;
12188   const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
12189                                 THDVAR(thd, index_stat_enable);
12190   if (index_stat_enable)
12191   {
12192     if ((err= analyze_index(thd)) != 0)
12193       return err;
12194   }
12195   return 0;
12196 }
12197 
12198 int
analyze_index(THD * thd)12199 ha_ndbcluster::analyze_index(THD *thd)
12200 {
12201   DBUG_ENTER("ha_ndbcluster::analyze_index");
12202 
12203   Thd_ndb *thd_ndb= get_thd_ndb(thd);
12204   Ndb *ndb= thd_ndb->ndb;
12205 
12206   uint inx_list[MAX_INDEXES];
12207   uint inx_count= 0;
12208 
12209   uint inx;
12210   for (inx= 0; inx < table_share->keys; inx++)
12211   {
12212     NDB_INDEX_TYPE idx_type= get_index_type(inx);
12213 
12214     if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
12215          idx_type == UNIQUE_ORDERED_INDEX ||
12216          idx_type == ORDERED_INDEX))
12217     {
12218       if (inx_count < MAX_INDEXES)
12219         inx_list[inx_count++]= inx;
12220     }
12221   }
12222 
12223   if (inx_count != 0)
12224   {
12225     int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);
12226     if (err != 0)
12227       DBUG_RETURN(err);
12228   }
12229   DBUG_RETURN(0);
12230 }
12231 
12232 /*
12233   Set partition info
12234 
12235   SYNOPSIS
12236     set_part_info()
12237     part_info
12238 
12239   RETURN VALUE
12240     NONE
12241 
12242   DESCRIPTION
12243     Set up partition info when handler object created
12244 */
12245 
set_part_info(partition_info * part_info,bool early)12246 void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)
12247 {
12248   DBUG_ENTER("ha_ndbcluster::set_part_info");
12249   m_part_info= part_info;
12250   if (!early)
12251   {
12252     m_use_partition_pruning= FALSE;
12253     if (!(m_part_info->part_type == HASH_PARTITION &&
12254           m_part_info->list_of_part_fields &&
12255           !m_part_info->is_sub_partitioned()))
12256     {
12257       /*
12258         PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants
12259         all use MySQL defined partitioning. PARTITION BY KEY uses NDB native
12260         partitioning scheme.
12261       */
12262       m_use_partition_pruning= TRUE;
12263       m_user_defined_partitioning= TRUE;
12264     }
12265     if (m_part_info->part_type == HASH_PARTITION &&
12266         m_part_info->list_of_part_fields &&
12267         m_part_info->num_full_part_fields == 0)
12268     {
12269       /*
12270         CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();
12271         where no primary key is defined uses a hidden key as partition field
12272         and this makes it impossible to use any partition pruning. Partition
12273         pruning requires partitioning based on real fields, also the lack of
12274         a primary key means that all accesses to tables are based on either
12275         full table scans or index scans and they can never be pruned those
12276         scans given that the hidden key is unknown. In write_row, update_row,
12277         and delete_row the normal hidden key handling will fix things.
12278       */
12279       m_use_partition_pruning= FALSE;
12280     }
12281     DBUG_PRINT("info", ("m_use_partition_pruning = %d",
12282                          m_use_partition_pruning));
12283   }
12284   DBUG_VOID_RETURN;
12285 }
12286 
12287 /**
12288   Close the table
12289   - release resources setup by open()
12290  */
12291 
local_close(THD * thd,bool release_metadata_flag)12292 void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)
12293 {
12294   Ndb *ndb;
12295   DBUG_ENTER("ha_ndbcluster::local_close");
12296   if (m_key_fields)
12297   {
12298     MY_BITMAP **inx_bitmap;
12299     for (inx_bitmap= m_key_fields;
12300          (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);
12301          inx_bitmap++)
12302       if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)
12303         bitmap_free(*inx_bitmap);
12304     my_free((char*)m_key_fields, MYF(0));
12305     m_key_fields= NULL;
12306   }
12307   if (m_share)
12308   {
12309     /* ndb_share reference handler free */
12310     DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
12311                              m_share->key_string(), m_share->use_count));
12312     free_share(&m_share);
12313   }
12314   m_share= 0;
12315   if (release_metadata_flag)
12316   {
12317     ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
12318     release_metadata(thd, ndb);
12319   }
12320   DBUG_VOID_RETURN;
12321 }
12322 
close(void)12323 int ha_ndbcluster::close(void)
12324 {
12325   DBUG_ENTER("close");
12326   THD *thd= table->in_use;
12327   local_close(thd, TRUE);
12328   DBUG_RETURN(0);
12329 }
12330 
12331 
check_ndb_connection(THD * thd) const12332 int ha_ndbcluster::check_ndb_connection(THD* thd) const
12333 {
12334   Ndb *ndb;
12335   DBUG_ENTER("check_ndb_connection");
12336 
12337   if (!(ndb= check_ndb_in_thd(thd, true)))
12338     DBUG_RETURN(HA_ERR_NO_CONNECTION);
12339   if (ndb->setDatabaseName(m_dbname))
12340   {
12341     ERR_RETURN(ndb->getNdbError());
12342   }
12343   DBUG_RETURN(0);
12344 }
12345 
12346 
ndbcluster_close_connection(handlerton * hton,THD * thd)12347 static int ndbcluster_close_connection(handlerton *hton, THD *thd)
12348 {
12349   Thd_ndb *thd_ndb= get_thd_ndb(thd);
12350   DBUG_ENTER("ndbcluster_close_connection");
12351   if (thd_ndb)
12352   {
12353     Thd_ndb::release(thd_ndb);
12354     thd_set_thd_ndb(thd, NULL);
12355   }
12356   DBUG_RETURN(0);
12357 }
12358 
12359 
12360 /**
12361   Try to discover one table from NDB.
12362 */
12363 static
ndbcluster_discover(handlerton * hton,THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)12364 int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
12365                         const char *name,
12366                         uchar **frmblob,
12367                         size_t *frmlen)
12368 {
12369   int error= 0;
12370   NdbError ndb_error;
12371   size_t len;
12372   uchar* data= NULL;
12373   Ndb* ndb;
12374   char key[FN_REFLEN + 1];
12375   DBUG_ENTER("ndbcluster_discover");
12376   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
12377 
12378   // Check if the database directory for the table to discover exists
12379   // as otherwise there is no place to put the discovered .frm file.
12380   build_table_filename(key, sizeof(key) - 1, db, "", "", 0);
12381   const int database_exists= !my_access(key, F_OK);
12382   if (!database_exists)
12383   {
12384     sql_print_information("NDB: Could not find database directory '%s' "
12385                           "while trying to discover table '%s'", db, name);
12386     // Can't discover table when database directory does not exist
12387     DBUG_RETURN(1);
12388   }
12389 
12390   if (!(ndb= check_ndb_in_thd(thd)))
12391     DBUG_RETURN(HA_ERR_NO_CONNECTION);
12392   if (ndb->setDatabaseName(db))
12393   {
12394     ERR_RETURN(ndb->getNdbError());
12395   }
12396 
12397   build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
12398   /* ndb_share reference temporary */
12399   NDB_SHARE* share= get_share(key, 0, FALSE);
12400   if (share)
12401   {
12402     DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
12403                              share->key_string(), share->use_count));
12404   }
12405   if (share && get_ndb_share_state(share) == NSS_ALTERED)
12406   {
12407     // Frm has been altered on disk, but not yet written to ndb
12408     if (readfrm(key, &data, &len))
12409     {
12410       DBUG_PRINT("error", ("Could not read frm"));
12411       error= 1;
12412       goto err;
12413     }
12414   }
12415   else
12416   {
12417     NDBDICT* dict= ndb->getDictionary();
12418     Ndb_table_guard ndbtab_g(dict, name);
12419     const NDBTAB *tab= ndbtab_g.get_table();
12420     if (!tab)
12421     {
12422       const NdbError err= dict->getNdbError();
12423       if (err.code == 709 || err.code == 723)
12424       {
12425         error= -1;
12426         DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
12427       }
12428       else
12429       {
12430         error= -1;
12431         ndb_error= err;
12432         DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
12433       }
12434       goto err;
12435     }
12436     DBUG_PRINT("info", ("Found table %s", tab->getName()));
12437 
12438     len= tab->getFrmLength();
12439     if (len == 0 || tab->getFrmData() == NULL)
12440     {
12441       DBUG_PRINT("error", ("No frm data found."));
12442       error= 1;
12443       goto err;
12444     }
12445 
12446     if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))
12447     {
12448       DBUG_PRINT("error", ("Could not unpack table"));
12449       error= 1;
12450       goto err;
12451     }
12452   }
12453 #ifdef HAVE_NDB_BINLOG
12454   if (ndbcluster_check_if_local_table(db, name) &&
12455       !Ndb_dist_priv_util::is_distributed_priv_table(db, name))
12456   {
12457     DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",
12458                         db, name));
12459     sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",
12460                     db, name);
12461     error= 1;
12462     goto err;
12463   }
12464 #endif
12465   *frmlen= len;
12466   *frmblob= data;
12467 
12468   if (share)
12469   {
12470     /* ndb_share reference temporary free */
12471     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12472                              share->key_string(), share->use_count));
12473     free_share(&share);
12474   }
12475 
12476   DBUG_RETURN(0);
12477 err:
12478   my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
12479   if (share)
12480   {
12481     /* ndb_share reference temporary free */
12482     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12483                              share->key_string(), share->use_count));
12484     free_share(&share);
12485   }
12486 
12487   if (ndb_error.code)
12488   {
12489     ERR_RETURN(ndb_error);
12490   }
12491   DBUG_RETURN(error);
12492 }
12493 
12494 /**
12495   Check if a table exists in NDB.
12496 */
12497 static
ndbcluster_table_exists_in_engine(handlerton * hton,THD * thd,const char * db,const char * name)12498 int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd,
12499                                       const char *db,
12500                                       const char *name)
12501 {
12502   Ndb* ndb;
12503   DBUG_ENTER("ndbcluster_table_exists_in_engine");
12504   DBUG_PRINT("enter", ("db: %s  name: %s", db, name));
12505 
12506   if (!(ndb= check_ndb_in_thd(thd)))
12507     DBUG_RETURN(HA_ERR_NO_CONNECTION);
12508   NDBDICT* dict= ndb->getDictionary();
12509   NdbDictionary::Dictionary::List list;
12510   if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
12511   {
12512     ERR_RETURN(dict->getNdbError());
12513   }
12514   for (uint i= 0 ; i < list.count ; i++)
12515   {
12516     NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
12517     if (my_strcasecmp(table_alias_charset, elmt.database, db))
12518       continue;
12519     if (my_strcasecmp(table_alias_charset, elmt.name, name))
12520       continue;
12521     DBUG_PRINT("info", ("Found table"));
12522     DBUG_RETURN(HA_ERR_TABLE_EXIST);
12523   }
12524   DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
12525 }
12526 
12527 
tables_get_key(const char * entry,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))12528 extern "C" uchar* tables_get_key(const char *entry, size_t *length,
12529                                 my_bool not_used MY_ATTRIBUTE((unused)))
12530 {
12531   *length= strlen(entry);
12532   return (uchar*) entry;
12533 }
12534 
12535 
12536 /**
12537   Drop a database in NDB Cluster
12538 
12539   @note
12540     add a dummy void function, since stupid handlerton is returning void instead of int...
12541 */
ndbcluster_drop_database_impl(THD * thd,const char * path)12542 int ndbcluster_drop_database_impl(THD *thd, const char *path)
12543 {
12544   DBUG_ENTER("ndbcluster_drop_database");
12545   char dbname[FN_HEADLEN];
12546   Ndb* ndb;
12547   NdbDictionary::Dictionary::List list;
12548   uint i;
12549   char *tabname;
12550   List<char> drop_list;
12551   int ret= 0;
12552   ha_ndbcluster::set_dbname(path, (char *)&dbname);
12553   DBUG_PRINT("enter", ("db: %s", dbname));
12554 
12555   if (!(ndb= check_ndb_in_thd(thd)))
12556     DBUG_RETURN(-1);
12557 
12558   // List tables in NDB
12559   NDBDICT *dict= ndb->getDictionary();
12560   if (dict->listObjects(list,
12561                         NdbDictionary::Object::UserTable) != 0)
12562     DBUG_RETURN(-1);
12563   for (i= 0 ; i < list.count ; i++)
12564   {
12565     NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
12566     DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
12567 
12568     // Add only tables that belongs to db
12569     // Ignore Blob part tables - they are deleted when their table
12570     // is deleted.
12571     if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||
12572         IS_NDB_BLOB_PREFIX(elmt.name) ||
12573         ndb_fk_util_is_mock_name(elmt.name))
12574       continue;
12575     DBUG_PRINT("info", ("%s must be dropped", elmt.name));
12576     drop_list.push_back(thd->mem_strdup(elmt.name));
12577   }
12578   // Drop any tables belonging to database
12579   char full_path[FN_REFLEN + 1];
12580   char *tmp= full_path +
12581     build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
12582   if (ndb->setDatabaseName(dbname))
12583   {
12584     ERR_RETURN(ndb->getNdbError());
12585   }
12586   List_iterator_fast<char> it(drop_list);
12587   while ((tabname=it++))
12588   {
12589     tablename_to_filename(tabname, tmp, (uint)(FN_REFLEN - (tmp - full_path)-1));
12590     if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))
12591     {
12592       const NdbError err= dict->getNdbError();
12593       if (err.code != 709 && err.code != 723)
12594       {
12595         ret= ndb_to_mysql_error(&err);
12596       }
12597     }
12598   }
12599 
12600   dict->invalidateDbGlobal(dbname);
12601   DBUG_RETURN(ret);
12602 }
12603 
12604 
12605 /**
12606    @brief Check the given directory for any remaining NDB related
12607           leftovers and try to remove them.
12608 
12609    @param path The path of the directory to check
12610 
12611    @note This function is called only when all tables which mysqld or NDB
12612          knew about has been removed. Thus anything left behind can be
12613          safely removed.
12614 */
12615 
12616 static void
ndbcluster_drop_database_leftovers(const char * path)12617 ndbcluster_drop_database_leftovers(const char* path)
12618 {
12619   DBUG_ENTER("ndbcluster_drop_database_leftovers");
12620   MY_DIR* dirp;
12621   if (!(dirp= my_dir(path,MYF(MY_DONT_SORT))))
12622   {
12623     // The database directory didn't exist, crash in debug since
12624     // something is obviously wrong
12625     assert(false);
12626     DBUG_VOID_RETURN;
12627   }
12628 
12629   for (uint i= 0; i < dirp->number_off_files; i++)
12630   {
12631     FILEINFO* file= dirp->dir_entry + i;
12632     DBUG_PRINT("info", ("found: '%s'", file->name));
12633 
12634     char* extension= fn_ext(file->name);
12635     DBUG_PRINT("info", ("extension: '%s'", extension));
12636     if (strcmp(extension, ha_ndb_ext))
12637       continue;
12638 
12639     char file_path[FN_REFLEN];
12640     strxmov(file_path, path, "/", file->name, NullS);
12641     DBUG_PRINT("info", ("Found leftover .ndb file '%s'! Try to delete it.",
12642                         file_path));
12643     if (my_delete_with_symlink(file_path, MYF(0)))
12644     {
12645       // Failed to delete the file. Ignore it since the DROP DATABASE
12646       // will report an error later when it tries to delete the directory
12647       DBUG_PRINT("error", ("Delete of of '%s' failed, my_errno: %d",
12648                            file_path, my_errno()));
12649     }
12650   }
12651 
12652   my_dirend(dirp);
12653   DBUG_VOID_RETURN;
12654 }
12655 
12656 
ndbcluster_drop_database(handlerton * hton,char * path)12657 static void ndbcluster_drop_database(handlerton *hton, char *path)
12658 {
12659   THD *thd= current_thd;
12660   DBUG_ENTER("ndbcluster_drop_database");
12661 
12662   if (!ndb_schema_dist_is_ready())
12663   {
12664     /* Don't allow drop database unless schema distribution is ready */
12665     DBUG_VOID_RETURN;
12666   }
12667 
12668   ndbcluster_drop_database_impl(thd, path);
12669 
12670   /*
12671     At this point the mysqld has looped over all the tables it knew
12672     about in the database and dropped them one by one. The above call
12673     to 'ndbcluster_drop_database_impl' has dropped any NDB tables in
12674     the database which mysqld didn't know about(this could potentially
12675     happen if there was a "local" table with same name). This means that
12676     the database directory should be free of anything NDB related.
12677     Double check to make sure nothing is left behind and remove any
12678     leftovers(which according to BUG#44529 could happen after for
12679     example a failed ALTER TABLE).
12680   */
12681   ndbcluster_drop_database_leftovers(path);
12682 
12683   char db[FN_REFLEN];
12684   ha_ndbcluster::set_dbname(path, db);
12685   uint32 table_id= 0, table_version= 0;
12686   /*
12687     Since databases aren't real ndb schema object
12688     they don't have any id/version
12689 
12690     But since that id/version is used to make sure that event's on SCHEMA_TABLE
12691     is correct, we set random numbers
12692   */
12693   table_id = (uint32)rand();
12694   table_version = (uint32)rand();
12695   ndbcluster_log_schema_op(thd,
12696                            thd->query().str, thd->query().length,
12697                            db, "", table_id, table_version,
12698                            SOT_DROP_DB, NULL, NULL);
12699   DBUG_VOID_RETURN;
12700 }
12701 
ndb_create_table_from_engine(THD * thd,const char * db,const char * table_name)12702 int ndb_create_table_from_engine(THD *thd, const char *db,
12703                                  const char *table_name)
12704 {
12705   // Copy db and table_name to stack buffers since functions used by
12706   // ha_create_table_from_engine may convert to lowercase on some platforms
12707   char db_buf[FN_REFLEN + 1];
12708   char table_name_buf[FN_REFLEN + 1];
12709   my_stpnmov(db_buf, db, sizeof(db_buf));
12710   my_stpnmov(table_name_buf, table_name, sizeof(table_name_buf));
12711 
12712   LEX *old_lex= thd->lex, newlex;
12713   thd->lex= &newlex;
12714   newlex.set_current_select(NULL);
12715   lex_start(thd);
12716   int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);
12717   thd->lex= old_lex;
12718   return res;
12719 }
12720 
12721 
12722 static int
ndbcluster_find_files(handlerton * hton,THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)12723 ndbcluster_find_files(handlerton *hton, THD *thd,
12724                       const char *db, const char *path,
12725                       const char *wild, bool dir, List<LEX_STRING> *files)
12726 {
12727   DBUG_ENTER("ndbcluster_find_files");
12728   DBUG_PRINT("enter", ("db: %s", db));
12729   { // extra bracket to avoid gcc 2.95.3 warning
12730   uint i;
12731   Thd_ndb *thd_ndb;
12732   Ndb* ndb;
12733   char name[FN_REFLEN + 1];
12734   HASH ndb_tables, ok_tables;
12735   NDBDICT::List list;
12736 
12737   if (!(ndb= check_ndb_in_thd(thd)))
12738     DBUG_RETURN(HA_ERR_NO_CONNECTION);
12739   thd_ndb= get_thd_ndb(thd);
12740 
12741   if (dir)
12742     DBUG_RETURN(0); // Discover of databases not yet supported
12743 
12744   Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);
12745   if (ndb_global_schema_lock_guard.lock())
12746     DBUG_RETURN(HA_ERR_NO_CONNECTION);
12747 
12748   // List tables in NDB
12749   NDBDICT *dict= ndb->getDictionary();
12750   if (dict->listObjects(list,
12751                         NdbDictionary::Object::UserTable) != 0)
12752     ERR_RETURN(dict->getNdbError());
12753 
12754   if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,
12755                    (my_hash_get_key)tables_get_key,0,0,
12756                    PSI_INSTRUMENT_ME))
12757   {
12758     DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
12759     DBUG_RETURN(-1);
12760   }
12761 
12762   if (my_hash_init(&ok_tables, system_charset_info,32,0,0,
12763                    (my_hash_get_key)tables_get_key,0,0,
12764                    PSI_INSTRUMENT_ME))
12765   {
12766     DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
12767     my_hash_free(&ndb_tables);
12768     DBUG_RETURN(-1);
12769   }
12770 
12771   for (i= 0 ; i < list.count ; i++)
12772   {
12773     NDBDICT::List::Element& elmt= list.elements[i];
12774     if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
12775     {
12776       DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
12777       continue;
12778     }
12779     DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
12780 
12781     // Add only tables that belongs to db
12782     if (my_strcasecmp(system_charset_info, elmt.database, db))
12783       continue;
12784 
12785     // Apply wildcard to list of tables in NDB
12786     if (wild)
12787     {
12788       if (lower_case_table_names)
12789       {
12790         if (wild_case_compare(files_charset_info, elmt.name, wild))
12791           continue;
12792       }
12793       else if (wild_compare(elmt.name,wild,0))
12794         continue;
12795     }
12796     DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));
12797     my_hash_insert(&ndb_tables, (uchar*)thd->mem_strdup(elmt.name));
12798   }
12799 
12800   LEX_STRING *file_name;
12801   List_iterator<LEX_STRING> it(*files);
12802   List<char> delete_list;
12803   char *file_name_str;
12804   while ((file_name=it++))
12805   {
12806     bool file_on_disk= FALSE;
12807     DBUG_PRINT("info", ("File : %s", file_name->str));
12808     if (my_hash_search(&ndb_tables,
12809                        (const uchar*)file_name->str, file_name->length))
12810     {
12811       build_table_filename(name, sizeof(name) - 1, db,
12812                            file_name->str, reg_ext, 0);
12813       if (my_access(name, F_OK))
12814       {
12815         /* No frm for database, table name combination, but
12816          * Cluster says the table with that combination exists.
12817          * Assume frm was deleted, re-discover from engine.
12818          */
12819         DBUG_PRINT("info", ("Table %s listed and need discovery",
12820                             file_name->str));
12821         if (ndb_create_table_from_engine(thd, db, file_name->str))
12822         {
12823           push_warning_printf(thd, Sql_condition::SL_WARNING,
12824                               ER_TABLE_EXISTS_ERROR,
12825                               "Discover of table %s.%s failed",
12826                               db, file_name->str);
12827           continue;
12828         }
12829       }
12830       DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));
12831       file_on_disk= TRUE;
12832     }
12833 
12834     // Check for .ndb file with this name
12835     build_table_filename(name, sizeof(name) - 1, db,
12836                          file_name->str, ha_ndb_ext, 0);
12837     DBUG_PRINT("info", ("Check access for %s", name));
12838     if (my_access(name, F_OK))
12839     {
12840       DBUG_PRINT("info", ("%s did not exist on disk", name));
12841       // .ndb file did not exist on disk, another table type
12842       if (file_on_disk)
12843       {
12844         // Cluster table and an frm file exist, but no .ndb file
12845         // Assume this means the frm is for a local table, and is
12846         // hiding the cluster table in its shadow.
12847 	// Ignore this ndb table
12848  	uchar *record= my_hash_search(&ndb_tables,
12849                                       (const uchar*) file_name->str,
12850                                       file_name->length);
12851 	assert(record);
12852 	my_hash_delete(&ndb_tables, record);
12853         push_warning_printf(thd, Sql_condition::SL_WARNING,
12854 			    ER_TABLE_EXISTS_ERROR,
12855 			    "Local table %s.%s shadows ndb table",
12856 			    db, file_name->str);
12857       }
12858       continue;
12859     }
12860 
12861     /* .ndb file exists */
12862     if (file_on_disk)
12863     {
12864       // File existed in Cluster and has both frm and .ndb files,
12865       // Put in ok_tables list
12866       my_hash_insert(&ok_tables, (uchar*) file_name->str);
12867       continue;
12868     }
12869     DBUG_PRINT("info", ("%s existed on disk", name));
12870     // The .ndb file exists on disk, but it's not in list of tables in cluster
12871     // Verify that handler agrees table is gone.
12872     if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==
12873         HA_ERR_NO_SUCH_TABLE)
12874     {
12875       DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));
12876       it.remove();
12877       if (thd == injector_thd)
12878       {
12879 	/*
12880 	  Don't delete anything when called from
12881 	  the binlog thread. This is a kludge to avoid
12882 	  that something is deleted when "Ndb schema dist"
12883 	  uses find_files() to check for "local tables in db"
12884 	*/
12885       }
12886       else
12887 	// Put in list of tables to remove from disk
12888 	delete_list.push_back(thd->mem_strdup(file_name->str));
12889     }
12890   }
12891 
12892   if (!thd_ndb->skip_binlog_setup_in_find_files())
12893   {
12894     /* setup logging to binlog for all discovered tables */
12895     char *end, *end1= name +
12896       build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
12897     for (i= 0; i < ok_tables.records; i++)
12898     {
12899       file_name_str= (char*)my_hash_element(&ok_tables, i);
12900       end= end1 +
12901         tablename_to_filename(file_name_str, end1, (uint)(sizeof(name) - (end1 - name)));
12902       ndbcluster_create_binlog_setup(thd, ndb, name, (uint)(end-name),
12903                                      db, file_name_str, 0);
12904     }
12905   }
12906 
12907   // Check for new files to discover
12908   DBUG_PRINT("info", ("Checking for new files to discover"));
12909   List<char> create_list;
12910   for (i= 0 ; i < ndb_tables.records ; i++)
12911   {
12912     file_name_str= (char*) my_hash_element(&ndb_tables, i);
12913     if (!my_hash_search(&ok_tables,
12914                         (const uchar*) file_name_str, strlen(file_name_str)))
12915     {
12916       /* Table in Cluster did not have frm or .ndb */
12917       build_table_filename(name, sizeof(name) - 1,
12918                            db, file_name_str, reg_ext, 0);
12919       if (my_access(name, F_OK))
12920       {
12921         DBUG_PRINT("info", ("%s must be discovered", file_name_str));
12922         // File is in list of ndb tables and not in ok_tables.
12923         // It is missing an frm file.
12924         // This table need to be created
12925         create_list.push_back(thd->mem_strdup(file_name_str));
12926       }
12927     }
12928   }
12929 
12930   if (thd == injector_thd)
12931   {
12932     /*
12933       Don't delete anything when called from
12934       the binlog thread. This is a kludge to avoid
12935       that something is deleted when "Ndb schema dist"
12936       uses find_files() to check for "local tables in db"
12937     */
12938   }
12939   else
12940   {
12941     /*
12942       Delete old files
12943       (.frm files with corresponding .ndb + does not exists in NDB)
12944     */
12945     List_iterator_fast<char> it3(delete_list);
12946     while ((file_name_str= it3++))
12947     {
12948       DBUG_PRINT("info", ("Deleting local files for table '%s.%s'",
12949                           db, file_name_str));
12950 
12951       // Delete the table and its related files from disk
12952       Ndb_local_schema::Table local_table(thd, db, file_name_str);
12953       local_table.remove_table();
12954 
12955       // Flush the table out of ndbapi's dictionary cache
12956       Ndb_table_guard ndbtab_g(ndb->getDictionary(), file_name_str);
12957       ndbtab_g.invalidate();
12958 
12959       // Flush the table from table def. cache.
12960       ndb_tdc_close_cached_table(thd, db, file_name_str);
12961 
12962       assert(!thd->is_error());
12963     }
12964   }
12965 
12966   // Create new files
12967   List_iterator_fast<char> it2(create_list);
12968   while ((file_name_str=it2++))
12969   {
12970     DBUG_PRINT("info", ("Table %s need discovery", file_name_str));
12971     if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)
12972     {
12973       LEX_STRING *tmp_file_name= 0;
12974       tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,
12975                                           (uint)strlen(file_name_str), TRUE);
12976       files->push_back(tmp_file_name);
12977     }
12978   }
12979 
12980   my_hash_free(&ok_tables);
12981   my_hash_free(&ndb_tables);
12982 
12983   /* Hide mysql.ndb_schema table */
12984   if (!strcmp(db, NDB_REP_DB))
12985   {
12986     LEX_STRING* file_name;
12987     List_iterator<LEX_STRING> it(*files);
12988     while ((file_name= it++))
12989     {
12990       if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))
12991       {
12992         DBUG_PRINT("info", ("Hiding table '%s.%s'", db, file_name->str));
12993         it.remove();
12994       }
12995     }
12996   }
12997   } // extra bracket to avoid gcc 2.95.3 warning
12998   DBUG_RETURN(0);
12999 }
13000 
13001 
13002 /**
13003   Check if the given table is a system table which is
13004   supported to store in NDB
13005 
13006 */
is_supported_system_table(const char * db,const char * table_name,bool is_sql_layer_system_table)13007 static bool is_supported_system_table(const char *db,
13008                                       const char *table_name,
13009                                       bool is_sql_layer_system_table)
13010 {
13011   if (!is_sql_layer_system_table)
13012   {
13013     // No need to check tables which MySQL Server does not
13014     // consider as system tables
13015     return false;
13016   }
13017 
13018   if (Ndb_dist_priv_util::is_distributed_priv_table(db, table_name))
13019   {
13020     // Table is supported as distributed system table and should be allowed
13021     // to be stored in NDB
13022     return true;
13023   }
13024 
13025   return false;
13026 }
13027 
13028 
13029 /* Call back after cluster connect */
connect_callback()13030 static int connect_callback()
13031 {
13032   native_mutex_lock(&ndb_util_thread.LOCK);
13033   update_status_variables(NULL, &g_ndb_status,
13034                           g_ndb_cluster_connection);
13035   native_cond_broadcast(&ndb_util_thread.COND);
13036   native_mutex_unlock(&ndb_util_thread.LOCK);
13037   return 0;
13038 }
13039 
13040 /**
13041  * Components
13042  */
13043 Ndb_util_thread ndb_util_thread;
13044 Ndb_index_stat_thread ndb_index_stat_thread;
13045 
13046 extern THD * ndb_create_thd(char * stackptr);
13047 
13048 #ifndef NDB_NO_WAIT_SETUP
ndb_wait_setup_func_impl(ulong max_wait)13049 static int ndb_wait_setup_func_impl(ulong max_wait)
13050 {
13051   DBUG_ENTER("ndb_wait_setup_func_impl");
13052 
13053   native_mutex_lock(&ndbcluster_mutex);
13054 
13055   struct timespec abstime;
13056   set_timespec(&abstime, 1);
13057 
13058   while (max_wait &&
13059          (!ndb_setup_complete || !ndb_index_stat_thread.is_setup_complete()))
13060   {
13061     int rc= native_cond_timedwait(&COND_ndb_setup_complete,
13062                                   &ndbcluster_mutex,
13063                                   &abstime);
13064     if (rc)
13065     {
13066       if (rc == ETIMEDOUT)
13067       {
13068         DBUG_PRINT("info", ("1s elapsed waiting"));
13069         max_wait--;
13070         set_timespec(&abstime, 1); /* 1 second from now*/
13071       }
13072       else
13073       {
13074         DBUG_PRINT("info", ("Bad native_cond_timedwait rc : %u",
13075                             rc));
13076         assert(false);
13077         break;
13078       }
13079     }
13080   }
13081 
13082   native_mutex_unlock(&ndbcluster_mutex);
13083 
13084 #ifndef NDB_WITHOUT_DIST_PRIV
13085   do
13086   {
13087     /**
13088      * Check if we (might) need a flush privileges
13089      */
13090     THD* thd= current_thd;
13091     bool own_thd= thd == NULL;
13092     if (own_thd)
13093     {
13094       thd= ndb_create_thd((char*)&thd);
13095       if (thd == 0)
13096         break;
13097     }
13098 
13099     if (Ndb_dist_priv_util::priv_tables_are_in_ndb(thd))
13100     {
13101       Ndb_local_connection mysqld(thd);
13102       mysqld.raw_run_query("FLUSH PRIVILEGES", sizeof("FLUSH PRIVILEGES"), 0);
13103     }
13104 
13105     if (own_thd)
13106     {
13107       // TLS variables should not point to thd anymore.
13108       thd->restore_globals();
13109       delete thd;
13110     }
13111   } while (0);
13112 #endif
13113 
13114   DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);
13115 }
13116 
13117 int(*ndb_wait_setup_func)(ulong) = 0;
13118 #endif
13119 
13120 static int
13121 ndbcluster_make_pushed_join(handlerton *, THD*, const AQP::Join_plan*);
13122 
13123 /* Version in composite numerical format */
13124 static Uint32 ndb_version = NDB_VERSION_D;
13125 static MYSQL_SYSVAR_UINT(
13126   version,                          /* name */
13127   ndb_version,                      /* var */
13128   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
13129   "Compile version for ndbcluster",
13130   NULL,                             /* check func. */
13131   NULL,                             /* update func. */
13132   0,                                /* default */
13133   0,                                /* min */
13134   0,                                /* max */
13135   0                                 /* block */
13136 );
13137 
13138 /* Version in ndb-Y.Y.Y[-status] format */
13139 static char* ndb_version_string = (char*)NDB_NDB_VERSION_STRING;
13140 static MYSQL_SYSVAR_STR(
13141   version_string,                  /* name */
13142   ndb_version_string,              /* var */
13143   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
13144   "Compile version string for ndbcluster",
13145   NULL,                             /* check func. */
13146   NULL,                             /* update func. */
13147   NULL                              /* default */
13148 );
13149 
13150 extern int ndb_dictionary_is_mysqld;
13151 
13152 Uint32 recv_thread_num_cpus;
13153 static int ndb_recv_thread_cpu_mask_check_str(const char *str);
13154 static void ndb_recv_thread_cpu_mask_update();
13155 handlerton* ndbcluster_hton;
13156 
13157 
13158 /*
13159   Handle failure from ndbcluster_init() by printing error
13160   message(s) and exit the MySQL Server.
13161 
13162   NOTE! This is done to avoid the current undefined behaviour which occurs
13163   when an error return code from plugin's init() function just disables
13164   the plugin.
13165 */
13166 
13167 static
ndbcluster_init_abort(const char * error)13168 void ndbcluster_init_abort(const char* error)
13169 {
13170   ndb_log_error("%s", error);
13171   ndb_log_error("Failed to initialize ndbcluster, aborting!");
13172   ndb_log_error("Use --skip-ndbcluster to start without ndbcluster.");
13173   exit(1);
13174 }
13175 
13176 
13177 /*
13178   Initialize the ndbcluster storage engine
13179  */
13180 
13181 static
ndbcluster_init(void * p)13182 int ndbcluster_init(void* p)
13183 {
13184   DBUG_ENTER("ndbcluster_init");
13185 
13186   assert(!ndbcluster_inited);
13187 
13188 #ifdef HAVE_NDB_BINLOG
13189   /* Check const alignment */
13190   assert(DependencyTracker::InvalidTransactionId ==
13191          Ndb_binlog_extra_row_info::InvalidTransactionId);
13192 
13193   if (global_system_variables.binlog_format == BINLOG_FORMAT_STMT)
13194   {
13195     /* Set global to mixed - note that this is not the default,
13196      * but the current global value
13197      */
13198     global_system_variables.binlog_format = BINLOG_FORMAT_MIXED;
13199     sql_print_information("NDB: Changed global value of binlog_format from STATEMENT to MIXED");
13200 
13201   }
13202 #endif
13203   if (ndb_util_thread.init() ||
13204       DBUG_EVALUATE_IF("ndbcluster_init_fail1", true, false))
13205   {
13206     ndbcluster_init_abort("Failed to initialize NDB Util");
13207   }
13208 
13209   if (ndb_index_stat_thread.init())
13210   {
13211     ndbcluster_init_abort("Failed to initialize NDB Index Stat");
13212   }
13213 
13214   native_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
13215   native_cond_init(&COND_ndb_setup_complete);
13216   ndb_dictionary_is_mysqld= 1;
13217   ndb_setup_complete= 0;
13218   ndbcluster_hton= (handlerton *)p;
13219   ndbcluster_global_schema_lock_init(ndbcluster_hton);
13220 
13221   {
13222     handlerton *h= ndbcluster_hton;
13223     h->state=            SHOW_OPTION_YES;
13224     h->db_type=          DB_TYPE_NDBCLUSTER;
13225     h->close_connection= ndbcluster_close_connection;
13226     h->commit=           ndbcluster_commit;
13227     h->rollback=         ndbcluster_rollback;
13228     h->create=           ndbcluster_create_handler; /* Create a new handler */
13229     h->drop_database=    ndbcluster_drop_database;  /* Drop a database */
13230     h->panic=            ndbcluster_end;            /* Panic call */
13231     h->show_status=      ndbcluster_show_status;    /* Show status */
13232     h->get_tablespace=   ndbcluster_get_tablespace; /* Get ts for old ver */
13233     h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */
13234     h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */
13235 #if MYSQL_VERSION_ID >= 50501
13236     h->fill_is_table=    ndbcluster_fill_is_table;
13237 #else
13238     h->fill_files_table= ndbcluster_fill_files_table;
13239 #endif
13240     ndbcluster_binlog_init(h);
13241     h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED |
13242       HTON_NO_BINLOG_ROW_OPT;
13243     h->discover=         ndbcluster_discover;
13244     h->find_files=       ndbcluster_find_files;
13245     h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
13246     h->make_pushed_join= ndbcluster_make_pushed_join;
13247     h->is_supported_system_table = is_supported_system_table;
13248   }
13249 
13250   // Initialize NdbApi
13251   ndb_init_internal();
13252 
13253   /* allocate connection resources and connect to cluster */
13254   const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);
13255   if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,
13256                          opt_ndb_cluster_connection_pool,
13257                          (global_opti_node_select & 1),
13258                          opt_ndb_connectstring,
13259                          opt_ndb_nodeid,
13260                          opt_ndb_recv_thread_activation_threshold))
13261   {
13262     ndbcluster_init_abort("Failed to initialize connection(s)");
13263   }
13264 
13265   /* Translate recv thread cpu mask if set */
13266   if (ndb_recv_thread_cpu_mask_check_str(opt_ndb_recv_thread_cpu_mask) == 0)
13267   {
13268     if (recv_thread_num_cpus)
13269     {
13270       ndb_recv_thread_cpu_mask_update();
13271     }
13272   }
13273 
13274   (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,
13275                       (my_hash_get_key) ndbcluster_get_key,0,0,
13276                       PSI_INSTRUMENT_ME);
13277   (void) my_hash_init(&ndbcluster_dropped_tables,table_alias_charset,32,0,0,
13278                       (my_hash_get_key) ndbcluster_get_key,0,0,
13279                       PSI_INSTRUMENT_ME);
13280   /* start the ndb injector thread */
13281   if (ndbcluster_binlog_start())
13282   {
13283     ndbcluster_init_abort("Failed to start NDB Binlog");
13284   }
13285 
13286   // Create utility thread
13287   if (ndb_util_thread.start())
13288   {
13289     ndbcluster_init_abort("Failed to start NDB Util");
13290   }
13291 
13292   // Create index statistics thread
13293   if (ndb_index_stat_thread.start() ||
13294       DBUG_EVALUATE_IF("ndbcluster_init_fail2", true, false))
13295   {
13296     ndbcluster_init_abort("Failed to start NDB Index Stat");
13297   }
13298 
13299 #ifndef NDB_NO_WAIT_SETUP
13300   ndb_wait_setup_func= ndb_wait_setup_func_impl;
13301 #endif
13302 
13303   memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));
13304 
13305   ndbcluster_inited= 1;
13306 
13307   DBUG_RETURN(0); // OK
13308 }
13309 
13310 #ifndef NDEBUG
13311 static
13312 const char*
get_share_state_string(NDB_SHARE_STATE s)13313 get_share_state_string(NDB_SHARE_STATE s)
13314 {
13315   switch(s) {
13316   case NSS_INITIAL:
13317     return "NSS_INITIAL";
13318   case NSS_ALTERED:
13319     return "NSS_ALTERED";
13320   case NSS_DROPPED:
13321     return "NSS_DROPPED";
13322   }
13323   assert(false);
13324   return "<unknown>";
13325 }
13326 #endif
13327 
13328 int ndbcluster_binlog_end(THD *thd);
13329 
ndbcluster_end(handlerton * hton,ha_panic_function type)13330 static int ndbcluster_end(handlerton *hton, ha_panic_function type)
13331 {
13332   DBUG_ENTER("ndbcluster_end");
13333 
13334   if (!ndbcluster_inited)
13335     DBUG_RETURN(0);
13336   ndbcluster_inited= 0;
13337 
13338   /* Stop index stat thread */
13339   ndb_index_stat_thread.stop();
13340 
13341   /* wait for util and binlog thread to finish */
13342   ndbcluster_binlog_end(NULL);
13343 
13344   {
13345     native_mutex_lock(&ndbcluster_mutex);
13346     uint save = ndbcluster_open_tables.records; (void)save;
13347     while (ndbcluster_open_tables.records)
13348     {
13349       NDB_SHARE *share=
13350         (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);
13351 #ifndef NDEBUG
13352       fprintf(stderr,
13353               "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
13354               share->key_string(), share->use_count,
13355               get_share_state_string(share->state),
13356               (uint)share->state);
13357 #endif
13358       ndbcluster_real_free_share(&share);
13359     }
13360     native_mutex_unlock(&ndbcluster_mutex);
13361     assert(save == 0);
13362   }
13363   my_hash_free(&ndbcluster_open_tables);
13364 
13365   {
13366     native_mutex_lock(&ndbcluster_mutex);
13367     uint save = ndbcluster_dropped_tables.records; (void)save;
13368     while (ndbcluster_dropped_tables.records)
13369     {
13370       NDB_SHARE *share=
13371         (NDB_SHARE*) my_hash_element(&ndbcluster_dropped_tables, 0);
13372 #ifndef NDEBUG
13373       fprintf(stderr,
13374               "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
13375               share->key_string(), share->use_count,
13376               get_share_state_string(share->state),
13377               (uint)share->state);
13378       /**
13379        * For unknown reasons...the dist-priv tables linger here
13380        * TODO investigate why
13381        */
13382       if (Ndb_dist_priv_util::is_distributed_priv_table(share->db,
13383                                                         share->table_name))
13384       {
13385         save--;
13386       }
13387 #endif
13388       ndbcluster_real_free_share(&share);
13389     }
13390     native_mutex_unlock(&ndbcluster_mutex);
13391     assert(save == 0);
13392   }
13393   my_hash_free(&ndbcluster_dropped_tables);
13394 
13395   ndb_index_stat_end();
13396   ndbcluster_disconnect();
13397 
13398   ndbcluster_global_schema_lock_deinit();
13399   ndb_util_thread.deinit();
13400   ndb_index_stat_thread.deinit();
13401 
13402   native_mutex_destroy(&ndbcluster_mutex);
13403   native_cond_destroy(&COND_ndb_setup_complete);
13404 
13405   // Cleanup NdbApi
13406   ndb_end_internal();
13407 
13408   DBUG_RETURN(0);
13409 }
13410 
print_error(int error,myf errflag)13411 void ha_ndbcluster::print_error(int error, myf errflag)
13412 {
13413   DBUG_ENTER("ha_ndbcluster::print_error");
13414   DBUG_PRINT("enter", ("error: %d", error));
13415 
13416   if (error == HA_ERR_NO_PARTITION_FOUND)
13417     m_part_info->print_no_partition_found(table);
13418   else
13419   {
13420     if (error == HA_ERR_FOUND_DUPP_KEY &&
13421         (table == NULL || table->file == NULL))
13422     {
13423       /*
13424         This is a sideffect of 'ndbcluster_print_error' (called from
13425         'ndbcluster_commit' and 'ndbcluster_rollback') which realises
13426         that it "knows nothing" and creates a brand new ha_ndbcluster
13427         in order to be able to call the print_error() function.
13428         Unfortunately the new ha_ndbcluster hasn't been open()ed
13429         and thus table pointer etc. is not set. Since handler::print_error()
13430         will use that pointer without checking for NULL(it naturally
13431         assumes an error can only be returned when the handler is open)
13432         this would crash the mysqld unless it's handled here.
13433       */
13434       my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);
13435       DBUG_VOID_RETURN;
13436     }
13437     if (error == ER_CANT_DROP_FIELD_OR_KEY)
13438     {
13439       /*
13440         Called on drop unknown FK by server when algorithm=copy or
13441         by handler when algorithm=inplace.  In both cases the error
13442         was already printed in ha_ndb_ddl_fk.cc.
13443       */
13444       THD* thd= NULL;
13445       if (table != NULL &&
13446           (thd= table->in_use) != NULL &&
13447           thd->lex != NULL &&
13448           thd->lex->sql_command == SQLCOM_ALTER_TABLE)
13449       {
13450         DBUG_VOID_RETURN;
13451       }
13452       assert(false);
13453     }
13454 
13455     handler::print_error(error, errflag);
13456   }
13457   DBUG_VOID_RETURN;
13458 }
13459 
13460 
13461 /**
13462   Set a given location from full pathname to database name.
13463 */
13464 
set_dbname(const char * path_name,char * dbname)13465 void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
13466 {
13467   ndb_set_dbname(path_name, dbname);
13468 }
13469 
13470 /**
13471   Set m_dbname from full pathname to table file.
13472 */
13473 
set_dbname(const char * path_name)13474 void ha_ndbcluster::set_dbname(const char *path_name)
13475 {
13476   ndb_set_dbname(path_name, m_dbname);
13477 }
13478 
13479 /**
13480   Set a given location from full pathname to table file.
13481 */
13482 
13483 void
set_tabname(const char * path_name,char * tabname)13484 ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
13485 {
13486   ndb_set_tabname(path_name, tabname);
13487 }
13488 
13489 /**
13490   Set m_tabname from full pathname to table file.
13491 */
13492 
set_tabname(const char * path_name)13493 void ha_ndbcluster::set_tabname(const char *path_name)
13494 {
13495   ndb_set_tabname(path_name, m_tabname);
13496 }
13497 
13498 
13499 /*
13500   If there are no stored stats, should we do a tree-dive on all db
13501   nodes.  The result is fairly good but does mean a round-trip.
13502  */
13503 static const bool g_ndb_records_in_range_tree_dive= false;
13504 
13505 /* Determine roughly how many records are in the range specified */
13506 ha_rows
records_in_range(uint inx,key_range * min_key,key_range * max_key)13507 ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
13508                                 key_range *max_key)
13509 {
13510   KEY *key_info= table->key_info + inx;
13511   uint key_length= key_info->key_length;
13512   NDB_INDEX_TYPE idx_type= get_index_type(inx);
13513 
13514   DBUG_ENTER("records_in_range");
13515   // Prevent partial read of hash indexes by returning HA_POS_ERROR
13516   if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
13517       ((min_key && min_key->length < key_length) ||
13518        (max_key && max_key->length < key_length)))
13519     DBUG_RETURN(HA_POS_ERROR);
13520 
13521   // Read from hash index with full key
13522   // This is a "const" table which returns only one record!
13523   if ((idx_type != ORDERED_INDEX) &&
13524       ((min_key && min_key->length == key_length) &&
13525        (max_key && max_key->length == key_length) &&
13526        (min_key->key==max_key->key ||
13527         memcmp(min_key->key, max_key->key, key_length)==0)))
13528     DBUG_RETURN(1);
13529 
13530   // XXX why this if
13531   if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
13532        idx_type == UNIQUE_ORDERED_INDEX ||
13533        idx_type == ORDERED_INDEX))
13534   {
13535     THD *thd= current_thd;
13536     const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
13537                                   THDVAR(thd, index_stat_enable);
13538 
13539     if (index_stat_enable)
13540     {
13541       ha_rows rows= HA_POS_ERROR;
13542       int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);
13543       if (err == 0)
13544       {
13545         /**
13546          * optmizer thinks that all values < 2 are exact...but
13547          * but we don't provide exact statistics
13548          */
13549         if (rows < 2)
13550           rows = 2;
13551         DBUG_RETURN(rows);
13552       }
13553       if (err != 0 &&
13554           /* no stats is not unexpected error */
13555           err != NdbIndexStat::NoIndexStats &&
13556           /* warning was printed at first error */
13557           err != NdbIndexStat::MyHasError &&
13558           /* stats thread aborted request */
13559           err != NdbIndexStat::MyAbortReq)
13560       {
13561         push_warning_printf(thd, Sql_condition::SL_WARNING,
13562                             ER_CANT_GET_STAT, /* pun? */
13563                             "index stats (RIR) for key %s:"
13564                             " unexpected error %d",
13565                             key_info->name, err);
13566       }
13567       /*fall through*/
13568     }
13569 
13570     if (g_ndb_records_in_range_tree_dive)
13571     {
13572       NDB_INDEX_DATA& d=m_index[inx];
13573       const NDBINDEX* index= d.index;
13574       Ndb *ndb= get_ndb(thd);
13575       NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;
13576       NdbTransaction* trans=NULL;
13577       int res=0;
13578       Uint64 rows;
13579 
13580       do
13581       {
13582         if ((trans=active_trans) == NULL ||
13583             trans->commitStatus() != NdbTransaction::Started)
13584         {
13585           DBUG_PRINT("info", ("no active trans"));
13586           if (! (trans=ndb->startTransaction()))
13587             ERR_BREAK(ndb->getNdbError(), res);
13588         }
13589 
13590         /* Create an IndexBound struct for the keys */
13591         NdbIndexScanOperation::IndexBound ib;
13592         compute_index_bounds(ib,
13593                              key_info,
13594                              min_key,
13595                              max_key,
13596                              0);
13597 
13598         ib.range_no= 0;
13599 
13600         NdbIndexStat is;
13601         if (is.records_in_range(index,
13602                                 trans,
13603                                 d.ndb_record_key,
13604                                 m_ndb_record,
13605                                 &ib,
13606                                 0,
13607                                 &rows,
13608                                 0) == -1)
13609           ERR_BREAK(is.getNdbError(), res);
13610       } while (0);
13611 
13612       if (trans != active_trans && rows == 0)
13613         rows = 1;
13614       if (trans != active_trans && trans != NULL)
13615         ndb->closeTransaction(trans);
13616       if (res == 0)
13617         DBUG_RETURN(rows);
13618       /*fall through*/
13619     }
13620   }
13621 
13622   /* Use simple heuristics to estimate fraction
13623      of 'stats.record' returned from range.
13624   */
13625   do
13626   {
13627     if (stats.records == ~(ha_rows)0 || stats.records == 0)
13628     {
13629       /* Refresh statistics, only read from datanodes if 'use_exact_count' */
13630       THD *thd= current_thd;
13631       if (update_stats(thd, THDVAR(thd, use_exact_count)))
13632         break;
13633     }
13634 
13635     Uint64 rows;
13636     Uint64 table_rows= stats.records;
13637     size_t eq_bound_len= 0;
13638     size_t min_key_length= (min_key) ? min_key->length : 0;
13639     size_t max_key_length= (max_key) ? max_key->length : 0;
13640 
13641     // Might have an closed/open range bound:
13642     // Low range open
13643     if (!min_key_length)
13644     {
13645       rows= (!max_key_length)
13646            ? table_rows             // No range was specified
13647            : table_rows/10;         // -oo .. <high range> -> 10% selectivity
13648     }
13649     // High range open
13650     else if (!max_key_length)
13651     {
13652       rows= table_rows/10;          // <low range>..oo -> 10% selectivity
13653     }
13654     else
13655     {
13656       size_t bounds_len= MIN(min_key_length,max_key_length);
13657       uint eq_bound_len= 0;
13658       uint eq_bound_offs= 0;
13659 
13660       KEY_PART_INFO* key_part= key_info->key_part;
13661       KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
13662       for (; key_part != end; key_part++)
13663       {
13664         uint part_length= key_part->store_length;
13665         if (eq_bound_offs+part_length > bounds_len ||
13666             memcmp(&min_key->key[eq_bound_offs],
13667                    &max_key->key[eq_bound_offs],
13668                    part_length))
13669         {
13670           break;
13671         }
13672         eq_bound_len+= key_part->length;
13673         eq_bound_offs+= part_length;
13674       }
13675 
13676       if (!eq_bound_len)
13677       {
13678         rows= table_rows/20;        // <low range>..<high range> -> 5%
13679       }
13680       else
13681       {
13682         // Has an equality range on a leading part of 'key_length':
13683         // - Assume reduced selectivity for non-unique indexes
13684         //   by decreasing 'eq_fraction' by 20%
13685         // - Assume equal selectivity for all eq_parts in key.
13686 
13687         double eq_fraction = (double)(eq_bound_len) / key_length;
13688         if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
13689           eq_fraction/= 1.20;
13690         if (eq_fraction >= 1.0)        // Exact match -> 1 row
13691           DBUG_RETURN(1);
13692 
13693         rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));
13694         if (rows > (table_rows/50))    // EQ-range: Max 2% of rows
13695           rows= (table_rows/50);
13696 
13697         if (min_key_length > eq_bound_offs)
13698           rows/= 2;
13699         if (max_key_length > eq_bound_offs)
13700           rows/= 2;
13701       }
13702     }
13703 
13704     // Make sure that EQ is preferred even if row-count is low
13705     if (eq_bound_len && rows < 2)      // At least 2 rows as not exact
13706       rows= 2;
13707     else if (rows < 3)
13708       rows= 3;
13709     DBUG_RETURN(MIN(rows,table_rows));
13710   } while (0);
13711 
13712   DBUG_RETURN(10); /* Poor guess when you don't know anything */
13713 }
13714 
table_flags(void) const13715 ulonglong ha_ndbcluster::table_flags(void) const
13716 {
13717   THD *thd= current_thd;
13718   ulonglong f=
13719     HA_REC_NOT_IN_SEQ |
13720     HA_NULL_IN_KEY |
13721     HA_AUTO_PART_KEY |
13722     HA_NO_PREFIX_CHAR_KEYS |
13723     HA_CAN_GEOMETRY |
13724     HA_CAN_BIT_FIELD |
13725     HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
13726     HA_PARTIAL_COLUMN_READ |
13727     HA_HAS_OWN_BINLOGGING |
13728     HA_BINLOG_ROW_CAPABLE |
13729     HA_HAS_RECORDS |
13730     HA_READ_BEFORE_WRITE_REMOVAL |
13731     0;
13732 
13733   /*
13734     To allow for logging of ndb tables during stmt based logging;
13735     flag cabablity, but also turn off flag for OWN_BINLOGGING
13736   */
13737   if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
13738     f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
13739 
13740    /*
13741      Allow MySQL Server to decide that STATEMENT logging should be used
13742      for the distributed privilege tables. NOTE! This is a workaround
13743      for generic problem with forcing STATEMENT logging see BUG16482501.
13744    */
13745   if (Ndb_dist_priv_util::is_distributed_priv_table(m_dbname,m_tabname))
13746     f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
13747 
13748   /**
13749    * To maximize join pushability we want const-table
13750    * optimization blocked if 'ndb_join_pushdown= on'
13751    */
13752   if (THDVAR(thd, join_pushdown))
13753     f= f | HA_BLOCK_CONST_TABLE;
13754 
13755   return f;
13756 }
13757 
table_type() const13758 const char * ha_ndbcluster::table_type() const
13759 {
13760   return("NDBCLUSTER");
13761 }
max_supported_record_length() const13762 uint ha_ndbcluster::max_supported_record_length() const
13763 {
13764   return NDB_MAX_TUPLE_SIZE;
13765 }
max_supported_keys() const13766 uint ha_ndbcluster::max_supported_keys() const
13767 {
13768   return MAX_KEY;
13769 }
max_supported_key_parts() const13770 uint ha_ndbcluster::max_supported_key_parts() const
13771 {
13772   return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
13773 }
max_supported_key_length() const13774 uint ha_ndbcluster::max_supported_key_length() const
13775 {
13776   return NDB_MAX_KEY_SIZE;
13777 }
max_supported_key_part_length(HA_CREATE_INFO * create_info MY_ATTRIBUTE ((unused))) const13778 uint ha_ndbcluster::max_supported_key_part_length(HA_CREATE_INFO
13779                                  *create_info MY_ATTRIBUTE((unused))) const
13780 {
13781   return NDB_MAX_KEY_SIZE;
13782 }
low_byte_first() const13783 bool ha_ndbcluster::low_byte_first() const
13784 {
13785 #ifdef WORDS_BIGENDIAN
13786   return FALSE;
13787 #else
13788   return TRUE;
13789 #endif
13790 }
index_type(uint key_number)13791 const char* ha_ndbcluster::index_type(uint key_number)
13792 {
13793   switch (get_index_type(key_number)) {
13794   case ORDERED_INDEX:
13795   case UNIQUE_ORDERED_INDEX:
13796   case PRIMARY_KEY_ORDERED_INDEX:
13797     return "BTREE";
13798   case UNIQUE_INDEX:
13799   case PRIMARY_KEY_INDEX:
13800   default:
13801     return "HASH";
13802   }
13803 }
13804 
table_cache_type()13805 uint8 ha_ndbcluster::table_cache_type()
13806 {
13807   DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
13808   DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
13809 }
13810 
13811 /**
13812    Retrieve the commit count for the table object.
13813 
13814    @param thd              Thread context.
13815    @param norm_name        Normalized path to the table.
13816    @param[out] commit_count Commit count for the table.
13817 
13818    @return 0 on success.
13819    @return 1 if an error occured.
13820 */
13821 
ndb_get_commitcount(THD * thd,char * norm_name,Uint64 * commit_count)13822 uint ndb_get_commitcount(THD *thd, char *norm_name,
13823                          Uint64 *commit_count)
13824 {
13825   char dbname[NAME_LEN + 1];
13826   NDB_SHARE *share;
13827   DBUG_ENTER("ndb_get_commitcount");
13828 
13829   DBUG_PRINT("enter", ("name: %s", norm_name));
13830   native_mutex_lock(&ndbcluster_mutex);
13831   if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
13832                                           (const uchar*) norm_name,
13833                                           strlen(norm_name))))
13834   {
13835     native_mutex_unlock(&ndbcluster_mutex);
13836     DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
13837                          norm_name));
13838     DBUG_RETURN(1);
13839   }
13840   /* ndb_share reference temporary, free below */
13841   share->use_count++;
13842   DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
13843                            share->key_string(), share->use_count));
13844   native_mutex_unlock(&ndbcluster_mutex);
13845 
13846   native_mutex_lock(&share->mutex);
13847   if (opt_ndb_cache_check_time > 0)
13848   {
13849     if (share->commit_count != 0)
13850     {
13851       DBUG_PRINT("info", ("Getting commit_count: %llu from share",
13852                           share->commit_count));
13853       *commit_count= share->commit_count;
13854       native_mutex_unlock(&share->mutex);
13855       /* ndb_share reference temporary free */
13856       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
13857                                share->key_string(), share->use_count));
13858       free_share(&share);
13859       DBUG_RETURN(0);
13860     }
13861   }
13862   DBUG_PRINT("info", ("Get commit_count from NDB"));
13863   Ndb *ndb;
13864   if (!(ndb= check_ndb_in_thd(thd)))
13865     DBUG_RETURN(1);
13866 
13867   ha_ndbcluster::set_dbname(norm_name, dbname);
13868   if (ndb->setDatabaseName(dbname))
13869   {
13870     ERR_RETURN(ndb->getNdbError());
13871   }
13872   uint lock= share->commit_count_lock;
13873   native_mutex_unlock(&share->mutex);
13874 
13875   struct Ndb_statistics stat;
13876   {
13877     char tblname[NAME_LEN + 1];
13878     ha_ndbcluster::set_tabname(norm_name, tblname);
13879     Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);
13880     if (ndbtab_g.get_table() == 0
13881         || ndb_get_table_statistics(thd, NULL,
13882                                     FALSE,
13883                                     ndb,
13884                                     ndbtab_g.get_table()->getDefaultRecord(),
13885                                     &stat))
13886     {
13887       /* ndb_share reference temporary free */
13888       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
13889                                share->key_string(), share->use_count));
13890       free_share(&share);
13891       DBUG_RETURN(1);
13892     }
13893   }
13894 
13895   native_mutex_lock(&share->mutex);
13896   if (share->commit_count_lock == lock)
13897   {
13898     DBUG_PRINT("info", ("Setting commit_count: %llu", stat.commit_count));
13899     share->commit_count= stat.commit_count;
13900     *commit_count= stat.commit_count;
13901   }
13902   else
13903   {
13904     DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
13905     *commit_count= 0;
13906   }
13907   native_mutex_unlock(&share->mutex);
13908   /* ndb_share reference temporary free */
13909   DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
13910                            share->key_string(), share->use_count));
13911   free_share(&share);
13912   DBUG_RETURN(0);
13913 }
13914 
13915 
13916 /**
13917   Check if a cached query can be used.
13918 
13919   This is done by comparing the supplied engine_data to commit_count of
13920   the table.
13921 
13922   The commit_count is either retrieved from the share for the table, where
13923   it has been cached by the util thread. If the util thread is not started,
13924   NDB has to be contacetd to retrieve the commit_count, this will introduce
13925   a small delay while waiting for NDB to answer.
13926 
13927 
13928   @param thd            thread handle
13929   @param full_name      normalized path to the table in the canonical
13930                         format.
13931   @param full_name_len  length of the normalized path to the table.
13932   @param engine_data    parameter retrieved when query was first inserted into
13933                         the cache. If the value of engine_data is changed,
13934                         all queries for this table should be invalidated.
13935 
13936   @retval
13937     TRUE  Yes, use the query from cache
13938   @retval
13939     FALSE No, don't use the cached query, and if engine_data
13940           has changed, all queries for this table should be invalidated
13941 
13942 */
13943 
13944 static my_bool
ndbcluster_cache_retrieval_allowed(THD * thd,char * full_name,uint full_name_len,ulonglong * engine_data)13945 ndbcluster_cache_retrieval_allowed(THD *thd,
13946                                    char *full_name, uint full_name_len,
13947                                    ulonglong *engine_data)
13948 {
13949   Uint64 commit_count;
13950   char dbname[NAME_LEN + 1];
13951   char tabname[NAME_LEN + 1];
13952 
13953   ha_ndbcluster::set_dbname(full_name, dbname);
13954   ha_ndbcluster::set_tabname(full_name, tabname);
13955 
13956   DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
13957   DBUG_PRINT("enter", ("dbname: %s, tabname: %s",
13958                        dbname, tabname));
13959 
13960   if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
13961   {
13962     /* Don't allow qc to be used if table has been previously
13963        modified in transaction */
13964     if (!check_ndb_in_thd(thd))
13965       DBUG_RETURN(FALSE);
13966    Thd_ndb *thd_ndb= get_thd_ndb(thd);
13967     if (!thd_ndb->changed_tables.is_empty())
13968     {
13969       NDB_SHARE* share;
13970       List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
13971       while ((share= it++))
13972       {
13973         if (strcmp(share->table_name, tabname) == 0 &&
13974             strcmp(share->db, dbname) == 0)
13975         {
13976           DBUG_PRINT("exit", ("No, transaction has changed table"));
13977           DBUG_RETURN(FALSE);
13978         }
13979       }
13980     }
13981   }
13982 
13983   if (ndb_get_commitcount(thd, full_name, &commit_count))
13984   {
13985     *engine_data= 0; /* invalidate */
13986     DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
13987     DBUG_RETURN(FALSE);
13988   }
13989   DBUG_PRINT("info", ("engine_data: %llu, commit_count: %llu",
13990                       *engine_data, commit_count));
13991   if (commit_count == 0)
13992   {
13993     *engine_data= 0; /* invalidate */
13994     DBUG_PRINT("exit", ("No, local commit has been performed"));
13995     DBUG_RETURN(FALSE);
13996   }
13997   else if (*engine_data != commit_count)
13998   {
13999     *engine_data= commit_count; /* invalidate */
14000      DBUG_PRINT("exit", ("No, commit_count has changed"));
14001      DBUG_RETURN(FALSE);
14002    }
14003 
14004   DBUG_PRINT("exit", ("OK to use cache, engine_data: %llu",
14005                       *engine_data));
14006   DBUG_RETURN(TRUE);
14007 }
14008 
14009 
14010 /**
14011   Register a table for use in the query cache.
14012 
14013   Fetch the commit_count for the table and return it in engine_data,
14014   this will later be used to check if the table has changed, before
14015   the cached query is reused.
14016 
14017   @param thd            thread handle
14018   @param full_name      normalized path to the table in the
14019                         canonical format.
14020   @param full_name_len  length of the normalized path to the table.
14021   @param engine_callback  function to be called before using cache on
14022                           this table
14023   @param[out] engine_data    commit_count for this table
14024 
14025   @retval
14026     TRUE  Yes, it's ok to cahce this query
14027   @retval
14028     FALSE No, don't cach the query
14029 */
14030 
14031 my_bool
register_query_cache_table(THD * thd,char * full_name,size_t full_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)14032 ha_ndbcluster::register_query_cache_table(THD *thd,
14033                                           char *full_name,
14034                                           size_t full_name_len,
14035                                           qc_engine_callback *engine_callback,
14036                                           ulonglong *engine_data)
14037 {
14038   Uint64 commit_count;
14039 
14040   DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
14041   DBUG_PRINT("enter",("dbname: %s, tabname: %s",
14042 		      m_dbname, m_tabname));
14043 
14044   if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
14045   {
14046     /* Don't allow qc to be used if table has been previously
14047        modified in transaction */
14048     Thd_ndb *thd_ndb= get_thd_ndb(thd);
14049     if (!thd_ndb->changed_tables.is_empty())
14050     {
14051       assert(m_share);
14052       NDB_SHARE* share;
14053       List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
14054       while ((share= it++))
14055       {
14056         if (m_share == share)
14057         {
14058           DBUG_PRINT("exit", ("No, transaction has changed table"));
14059           DBUG_RETURN(FALSE);
14060         }
14061       }
14062     }
14063   }
14064 
14065   if (ndb_get_commitcount(thd, full_name, &commit_count))
14066   {
14067     *engine_data= 0;
14068     DBUG_PRINT("exit", ("Error, could not get commitcount"));
14069     DBUG_RETURN(FALSE);
14070   }
14071   *engine_data= commit_count;
14072   *engine_callback= ndbcluster_cache_retrieval_allowed;
14073   DBUG_PRINT("exit", ("commit_count: %llu", commit_count));
14074   DBUG_RETURN(commit_count > 0);
14075 }
14076 
14077 
ndbcluster_get_key(NDB_SHARE * share,size_t * length,my_bool)14078 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
14079                                 my_bool)
14080 {
14081   *length= share->key_length();
14082   return (uchar*) share->key_string();
14083 }
14084 
14085 
14086 #ifndef NDEBUG
14087 
print_ndbcluster_open_tables()14088 static void print_ndbcluster_open_tables()
14089 {
14090   DBUG_LOCK_FILE;
14091   fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
14092   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
14093   {
14094     NDB_SHARE* share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
14095     share->print("", DBUG_FILE);
14096   }
14097   fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
14098   DBUG_UNLOCK_FILE;
14099 }
14100 
14101 #endif
14102 
14103 
14104 #define dbug_print_open_tables()                \
14105   DBUG_EXECUTE("info",                          \
14106                print_ndbcluster_open_tables(););
14107 
14108 
14109 /*
14110   For some reason a share is still around, try to salvage the situation
14111   by closing all cached tables. If the share still exists, there is an
14112   error somewhere but only report this to the error log.  Keep this
14113   "trailing share" but rename it since there are still references to it
14114   to avoid segmentation faults.  There is a risk that the memory for
14115   this trailing share leaks.
14116 
14117   Must be called with previous native_mutex_lock(&ndbcluster_mutex)
14118 */
handle_trailing_share(THD * thd,NDB_SHARE * share)14119 int handle_trailing_share(THD *thd, NDB_SHARE *share)
14120 {
14121   static ulong trailing_share_id= 0;
14122   DBUG_ENTER("handle_trailing_share");
14123 
14124   /* ndb_share reference temporary, free below */
14125   ++share->use_count;
14126   if (opt_ndb_extra_logging > 9)
14127     sql_print_information ("handle_trailing_share: %s use_count: %u",
14128                            share->key_string(), share->use_count);
14129   DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
14130                            share->key_string(), share->use_count));
14131   native_mutex_unlock(&ndbcluster_mutex);
14132 
14133   ndb_tdc_close_cached_table(thd, share->db, share->table_name);
14134 
14135   native_mutex_lock(&ndbcluster_mutex);
14136   /* ndb_share reference temporary free */
14137   DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
14138                            share->key_string(), share->use_count));
14139   if (!--share->use_count)
14140   {
14141     if (opt_ndb_extra_logging > 9)
14142       sql_print_information ("handle_trailing_share: %s use_count: %u",
14143                              share->key_string(), share->use_count);
14144     if (opt_ndb_extra_logging)
14145       sql_print_information("NDB_SHARE: trailing share %s, "
14146                             "released by close_cached_tables",
14147                             share->key_string());
14148     ndbcluster_real_free_share(&share);
14149     DBUG_RETURN(0);
14150   }
14151   if (opt_ndb_extra_logging > 9)
14152     sql_print_information ("handle_trailing_share: %s use_count: %u",
14153                            share->key_string(), share->use_count);
14154 
14155   /*
14156     share still exists, if share has not been dropped by server
14157     release that share
14158   */
14159   if (share->state != NSS_DROPPED)
14160   {
14161     ndbcluster_mark_share_dropped(share);
14162     /* ndb_share reference create free */
14163     DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
14164                              share->key_string(), share->use_count));
14165     --share->use_count;
14166     if (opt_ndb_extra_logging > 9)
14167       sql_print_information ("handle_trailing_share: %s use_count: %u",
14168                              share->key_string(), share->use_count);
14169 
14170     if (share->use_count == 0)
14171     {
14172       if (opt_ndb_extra_logging)
14173         sql_print_information("NDB_SHARE: trailing share %s, "
14174                               "released after NSS_DROPPED check",
14175                               share->key_string());
14176       ndbcluster_real_free_share(&share);
14177       DBUG_RETURN(0);
14178     }
14179   }
14180 
14181   DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",
14182                       share->key_string(), share->use_count, (long) share->op));
14183   /*
14184      Ignore table shares only opened by util thread
14185    */
14186   if (!((share->use_count == 1) && share->util_thread))
14187   {
14188     sql_print_warning("NDB_SHARE: %s already exists use_count=%d."
14189                       " Moving away for safety, but possible memleak.",
14190                       share->key_string(), share->use_count);
14191   }
14192   dbug_print_open_tables();
14193 
14194   /*
14195     Ndb share has not been released as it should
14196   */
14197 #ifdef NOT_YET
14198   assert(FALSE);
14199 #endif
14200 
14201   /*
14202     This is probably an error.  We can however save the situation
14203     at the cost of a possible mem leak, by "renaming" the share
14204     - First remove from hash
14205   */
14206   my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
14207 
14208   {
14209     /*
14210       Give the leaked share a new name using a running number
14211     */
14212     char leak_name_buf[16]; // strlen("#leak4294967295")
14213     my_snprintf(leak_name_buf, sizeof(leak_name_buf),
14214                 "#leak%lu", trailing_share_id++);
14215     share->key = NDB_SHARE::create_key(leak_name_buf);
14216     // Note that share->db, share->table_name as well
14217     // as share->shadow_table->s->db etc. points into the memory
14218     // which share->key pointed to before the memory for leak key
14219     // was allocated, so it's not a good time to free the old key
14220     // here.
14221   }
14222   /* Keep it for possible the future trailing free */
14223   my_hash_insert(&ndbcluster_open_tables, (uchar*) share);
14224 
14225   DBUG_RETURN(0);
14226 }
14227 
14228 
14229 int
ndbcluster_rename_share(THD * thd,NDB_SHARE * share,NDB_SHARE_KEY * new_key)14230 ndbcluster_rename_share(THD *thd, NDB_SHARE *share, NDB_SHARE_KEY* new_key)
14231 {
14232   DBUG_ENTER("ndbcluster_rename_share");
14233   native_mutex_lock(&ndbcluster_mutex);
14234   DBUG_PRINT("enter", ("share->key: '%s'", share->key_string()));
14235   DBUG_PRINT("enter", ("new_key: '%s'", NDB_SHARE::key_get_key(new_key)));
14236 
14237   // Handle the case where NDB_SHARE with new_key already exists
14238   {
14239     NDB_SHARE *tmp =
14240         (NDB_SHARE*)my_hash_search(&ndbcluster_open_tables,
14241                                    NDB_SHARE::key_get_key(new_key),
14242                                    NDB_SHARE::key_get_length(new_key));
14243     if (tmp)
14244     {
14245       handle_trailing_share(thd, tmp);
14246     }
14247   }
14248 
14249   /* remove the share from hash */
14250   my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
14251 
14252   /* save old key if insert should fail */
14253   NDB_SHARE_KEY *old_key= share->key;
14254 
14255   share->key= new_key;
14256 
14257   if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14258   {
14259     DBUG_PRINT("error", ("Failed to insert %s", share->key_string()));
14260     // Catch this unlikely error in debug
14261     assert(false);
14262     share->key= old_key;
14263     if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14264     {
14265       sql_print_error("ndbcluster_rename_share: failed to recover %s",
14266                       share->key_string());
14267       DBUG_PRINT("error", ("Failed to reinsert share with old name %s",
14268                            share->key_string()));
14269     }
14270     native_mutex_unlock(&ndbcluster_mutex);
14271     DBUG_RETURN(-1);
14272   }
14273 
14274   DBUG_PRINT("info", ("setting db and table_name to point at new key"));
14275   share->db= NDB_SHARE::key_get_db_name(share->key);
14276   share->table_name= NDB_SHARE::key_get_table_name(share->key);
14277 
14278   Ndb_event_data *event_data= share->get_event_data_ptr();
14279   if (event_data && event_data->shadow_table)
14280   {
14281     if (!IS_TMP_PREFIX(share->table_name))
14282     {
14283       DBUG_PRINT("info", ("Renaming shadow table"));
14284       // Allocate new strings for db and table_name for shadow_table
14285       // in event_data's MEM_ROOT(where the shadow_table itself is allocated)
14286       // NOTE! This causes a slight memory leak since the already existing
14287       // strings are not release until the mem_root is eventually
14288       // released.
14289       lex_string_copy(&event_data->mem_root,
14290                       &event_data->shadow_table->s->db,
14291                       share->db);
14292       lex_string_copy(&event_data->mem_root,
14293                       &event_data->shadow_table->s->table_name,
14294                       share->table_name);
14295     }
14296     else
14297     {
14298       DBUG_PRINT("info", ("Name is temporary, skip rename of shadow table"));
14299       /**
14300        * we don't rename the table->s here
14301        *   that is used by injector
14302        *   as we don't know if all events has been processed
14303        * This will be dropped anyway
14304        */
14305     }
14306   }
14307   /* else rename will be handled when the ALTER event comes */
14308 
14309   // Print share after rename
14310   dbug_print_share("renamed share:", share);
14311 
14312   native_mutex_unlock(&ndbcluster_mutex);
14313   DBUG_RETURN(0);
14314 }
14315 
14316 /*
14317   Increase refcount on existing share.
14318   Always returns share and cannot fail.
14319 */
ndbcluster_get_share(NDB_SHARE * share)14320 NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
14321 {
14322   native_mutex_lock(&ndbcluster_mutex);
14323   share->use_count++;
14324 
14325   dbug_print_open_tables();
14326   dbug_print_share("ndbcluster_get_share:", share);
14327   if (opt_ndb_extra_logging > 9)
14328     sql_print_information ("ndbcluster_get_share: %s use_count: %u",
14329                            share->key_string(), share->use_count);
14330   native_mutex_unlock(&ndbcluster_mutex);
14331   return share;
14332 }
14333 
14334 
14335 
14336 NDB_SHARE*
create(const char * key,TABLE * table)14337 NDB_SHARE::create(const char* key, TABLE* table)
14338 {
14339   NDB_SHARE* share;
14340   if (!(share= (NDB_SHARE*) my_malloc(PSI_INSTRUMENT_ME,
14341                                       sizeof(*share),
14342                                       MYF(MY_WME | MY_ZEROFILL))))
14343     return NULL;
14344 
14345   share->flags= 0;
14346   share->state= NSS_INITIAL;
14347 
14348   /* Allocates enough space for key, db, and table_name */
14349   share->key= NDB_SHARE::create_key(key);
14350 
14351   share->db= NDB_SHARE::key_get_db_name(share->key);
14352   share->table_name= NDB_SHARE::key_get_table_name(share->key);
14353 
14354   thr_lock_init(&share->lock);
14355   native_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
14356   share->commit_count= 0;
14357   share->commit_count_lock= 0;
14358 
14359 #ifdef HAVE_NDB_BINLOG
14360   share->m_cfn_share= NULL;
14361 #endif
14362 
14363   share->op= 0;
14364   share->new_op= 0;
14365   share->event_data= 0;
14366 
14367   if (ndbcluster_binlog_init_share(current_thd, share, table))
14368   {
14369     DBUG_PRINT("error", ("get_share: %s could not init share", key));
14370     assert(share->event_data == NULL);
14371     NDB_SHARE::destroy(share);
14372     return NULL;
14373   }
14374 
14375   return share;
14376 }
14377 
14378 
14379 static inline
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists)14380 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
14381                                 bool create_if_not_exists)
14382 {
14383   NDB_SHARE *share;
14384   DBUG_ENTER("ndbcluster_get_share");
14385   DBUG_PRINT("enter", ("key: '%s'", key));
14386 
14387   if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
14388                                            (const uchar*) key,
14389                                            strlen(key))))
14390   {
14391     if (!create_if_not_exists)
14392     {
14393       DBUG_PRINT("error", ("get_share: %s does not exist", key));
14394       DBUG_RETURN(0);
14395     }
14396 
14397     if (!(share= NDB_SHARE::create(key, table)))
14398     {
14399       DBUG_PRINT("error", ("get_share: failed to alloc share"));
14400       my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));
14401       DBUG_RETURN(0);
14402     }
14403 
14404     // Insert the new share in list of open shares
14405     if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
14406     {
14407       NDB_SHARE::destroy(share);
14408       DBUG_RETURN(0);
14409     }
14410   }
14411   share->use_count++;
14412   if (opt_ndb_extra_logging > 9)
14413     sql_print_information ("ndbcluster_get_share: %s use_count: %u",
14414                            share->key_string(), share->use_count);
14415 
14416   dbug_print_open_tables();
14417   dbug_print_share("ndbcluster_get_share:", share);
14418   DBUG_RETURN(share);
14419 }
14420 
14421 
14422 /**
14423   Get NDB_SHARE for key
14424 
14425   Returns share for key, and increases the refcount on the share.
14426 
14427   @param create_if_not_exists, creates share if it does not already exist
14428   @param have_lock, ndbcluster_mutex already locked
14429 */
14430 
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists,bool have_lock)14431 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
14432                                 bool create_if_not_exists,
14433                                 bool have_lock)
14434 {
14435   NDB_SHARE *share;
14436   DBUG_ENTER("ndbcluster_get_share");
14437   DBUG_PRINT("enter", ("key: '%s', create_if_not_exists: %d, have_lock: %d",
14438                        key, create_if_not_exists, have_lock));
14439 
14440   if (!have_lock)
14441     native_mutex_lock(&ndbcluster_mutex);
14442 
14443   share= ndbcluster_get_share(key, table, create_if_not_exists);
14444 
14445   if (!have_lock)
14446     native_mutex_unlock(&ndbcluster_mutex);
14447 
14448   DBUG_RETURN(share);
14449 }
14450 
ndbcluster_real_free_share(NDB_SHARE ** share)14451 void ndbcluster_real_free_share(NDB_SHARE **share)
14452 {
14453   DBUG_ENTER("ndbcluster_real_free_share");
14454   dbug_print_share("ndbcluster_real_free_share:", *share);
14455 
14456   if (opt_ndb_extra_logging > 9)
14457     sql_print_information ("ndbcluster_real_free_share: %s use_count: %u",
14458                            (*share)->key_string(), (*share)->use_count);
14459 
14460   ndb_index_stat_free(*share);
14461 
14462   bool found= false;
14463   if ((* share)->state == NSS_DROPPED)
14464   {
14465     found= my_hash_delete(&ndbcluster_dropped_tables, (uchar*) *share) == 0;
14466 
14467     // If this is a 'trailing share', it might still be 'open'
14468     my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);
14469   }
14470   else
14471   {
14472     found= my_hash_delete(&ndbcluster_open_tables, (uchar*) *share) == 0;
14473   }
14474   assert(found);
14475 
14476   NDB_SHARE::destroy(*share);
14477   *share= 0;
14478 
14479   dbug_print_open_tables();
14480   DBUG_VOID_RETURN;
14481 }
14482 
14483 
ndbcluster_free_share(NDB_SHARE ** share,bool have_lock)14484 void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
14485 {
14486   if (!have_lock)
14487     native_mutex_lock(&ndbcluster_mutex);
14488   if (!--(*share)->use_count)
14489   {
14490     if (opt_ndb_extra_logging > 9)
14491       sql_print_information ("ndbcluster_free_share: %s use_count: %u",
14492                              (*share)->key_string(), (*share)->use_count);
14493     ndbcluster_real_free_share(share);
14494   }
14495   else
14496   {
14497     if (opt_ndb_extra_logging > 9)
14498       sql_print_information ("ndbcluster_free_share: %s use_count: %u",
14499                              (*share)->key_string(), (*share)->use_count);
14500     dbug_print_open_tables();
14501     dbug_print_share("ndbcluster_free_share:", *share);
14502   }
14503   if (!have_lock)
14504     native_mutex_unlock(&ndbcluster_mutex);
14505 }
14506 
14507 void
ndbcluster_mark_share_dropped(NDB_SHARE * share)14508 ndbcluster_mark_share_dropped(NDB_SHARE* share)
14509 {
14510   share->state= NSS_DROPPED;
14511   if (my_hash_delete(&ndbcluster_open_tables, (uchar*) share) == 0)
14512   {
14513     my_hash_insert(&ndbcluster_dropped_tables, (uchar*) share);
14514   }
14515   else
14516   {
14517     assert(false);
14518   }
14519   if (opt_ndb_extra_logging > 9)
14520   {
14521     sql_print_information ("ndbcluster_mark_share_dropped: %s use_count: %u",
14522                            share->key_string(), share->use_count);
14523   }
14524 }
14525 
14526 struct ndb_table_statistics_row {
14527   Uint64 rows;
14528   Uint64 commits;
14529   Uint32 size;
14530   Uint64 fixed_mem;
14531   Uint64 var_mem;
14532 };
14533 
update_stats(THD * thd,bool do_read_stat,uint part_id)14534 int ha_ndbcluster::update_stats(THD *thd,
14535                                 bool do_read_stat,
14536                                 uint part_id)
14537 {
14538   struct Ndb_statistics stat;
14539   Thd_ndb *thd_ndb= get_thd_ndb(thd);
14540   DBUG_ENTER("ha_ndbcluster::update_stats");
14541   do
14542   {
14543     if (m_share && !do_read_stat)
14544     {
14545       native_mutex_lock(&m_share->mutex);
14546       stat= m_share->stat;
14547       native_mutex_unlock(&m_share->mutex);
14548 
14549       assert(stat.row_count != ~(ha_rows)0); // should never be invalid
14550 
14551       /* Accept shared cached statistics if row_count is valid. */
14552       if (stat.row_count != ~(ha_rows)0)
14553         break;
14554     }
14555 
14556     /* Request statistics from datanodes */
14557     Ndb *ndb= thd_ndb->ndb;
14558     if (ndb->setDatabaseName(m_dbname))
14559     {
14560       set_my_errno(HA_ERR_OUT_OF_MEM);
14561       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
14562     }
14563     if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,
14564                                           m_ndb_record, &stat,
14565                                           part_id))
14566     {
14567       DBUG_RETURN(err);
14568     }
14569 
14570     /* Update shared statistics with fresh data */
14571     if (m_share)
14572     {
14573       native_mutex_lock(&m_share->mutex);
14574       m_share->stat= stat;
14575       native_mutex_unlock(&m_share->mutex);
14576     }
14577     break;
14578   }
14579   while(0);
14580 
14581   int no_uncommitted_rows_count= 0;
14582   if (m_table_info && !thd_ndb->m_error)
14583   {
14584     m_table_info->records= stat.row_count;
14585     m_table_info->last_count= thd_ndb->count;
14586     no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;
14587   }
14588   stats.mean_rec_length= stat.row_size;
14589   stats.data_file_length= stat.fragment_memory;
14590   stats.records= stat.row_count + no_uncommitted_rows_count;
14591   stats.max_data_file_length= stat.fragment_extent_space;
14592   stats.delete_length= stat.fragment_extent_free_space;
14593 
14594   DBUG_PRINT("exit", ("stats.records: %d  "
14595                       "stat->row_count: %d  "
14596                       "no_uncommitted_rows_count: %d"
14597                       "stat->fragment_extent_space: %u  "
14598                       "stat->fragment_extent_free_space: %u",
14599                       (int)stats.records,
14600                       (int)stat.row_count,
14601                       (int)no_uncommitted_rows_count,
14602                       (uint)stat.fragment_extent_space,
14603                       (uint)stat.fragment_extent_free_space));
14604   DBUG_RETURN(0);
14605 }
14606 
14607 /**
14608   Update 'row_count' in shared table statistcs if any rows where
14609   inserted/deleted by the local transaction related to specified
14610  'local_stat'.
14611   Should be called when transaction has succesfully commited its changes.
14612 */
14613 static
modify_shared_stats(NDB_SHARE * share,Ndb_local_table_statistics * local_stat)14614 void modify_shared_stats(NDB_SHARE *share,
14615                          Ndb_local_table_statistics *local_stat)
14616 {
14617   if (local_stat->no_uncommitted_rows_count)
14618   {
14619     native_mutex_lock(&share->mutex);
14620     assert(share->stat.row_count != ~(ha_rows)0);// should never be invalid
14621     if (share->stat.row_count != ~(ha_rows)0)
14622     {
14623       DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",
14624                           share->table_name, (ulong) share->stat.row_count,
14625                           local_stat->no_uncommitted_rows_count));
14626       share->stat.row_count=
14627         ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)
14628          ? share->stat.row_count+local_stat->no_uncommitted_rows_count
14629          : 0;
14630     }
14631     native_mutex_unlock(&share->mutex);
14632     local_stat->no_uncommitted_rows_count= 0;
14633   }
14634 }
14635 
14636 /* If part_id contains a legal partition id, ndbstat returns the
14637    partition-statistics pertaining to that partition only.
14638    Otherwise, it returns the table-statistics,
14639    which is an aggregate over all partitions of that table.
14640  */
14641 static
14642 int
ndb_get_table_statistics(THD * thd,ha_ndbcluster * file,bool report_error,Ndb * ndb,const NdbRecord * record,struct Ndb_statistics * ndbstat,uint part_id)14643 ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,
14644                          const NdbRecord *record,
14645                          struct Ndb_statistics * ndbstat,
14646                          uint part_id)
14647 {
14648   Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
14649   NdbTransaction* pTrans;
14650   NdbError error;
14651   int retries= 100;
14652   int reterr= 0;
14653   int retry_sleep= 30; /* 30 milliseconds */
14654   const char *dummyRowPtr;
14655   NdbOperation::GetValueSpec extraGets[8];
14656   Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;
14657   Uint32 size, fragid;
14658 
14659   DBUG_ENTER("ndb_get_table_statistics");
14660 
14661   assert(record != 0);
14662 
14663   /* We use the passed in NdbRecord just to get access to the
14664      table, we mask out any/all columns it may have and add
14665      our reads as extraGets.  This is necessary as they are
14666      all pseudo-columns
14667   */
14668   extraGets[0].column= NdbDictionary::Column::ROW_COUNT;
14669   extraGets[0].appStorage= &rows;
14670   extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;
14671   extraGets[1].appStorage= &commits;
14672   extraGets[2].column= NdbDictionary::Column::ROW_SIZE;
14673   extraGets[2].appStorage= &size;
14674   extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;
14675   extraGets[3].appStorage= &fixed_mem;
14676   extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;
14677   extraGets[4].appStorage= &var_mem;
14678   extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;
14679   extraGets[5].appStorage= &ext_space;
14680   extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;
14681   extraGets[6].appStorage= &free_ext_space;
14682   extraGets[7].column= NdbDictionary::Column::FRAGMENT;
14683   extraGets[7].appStorage= &fragid;
14684 
14685   const Uint32 codeWords= 1;
14686   Uint32 codeSpace[ codeWords ];
14687   NdbInterpretedCode code(NULL, // Table is irrelevant
14688                           &codeSpace[0],
14689                           codeWords);
14690   if ((code.interpret_exit_last_row() != 0) ||
14691       (code.finalise() != 0))
14692   {
14693     reterr= code.getNdbError().code;
14694     DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
14695                         error.code, error.message));
14696     DBUG_RETURN(reterr);
14697   }
14698 
14699   do
14700   {
14701     Uint32 count= 0;
14702     Uint64 sum_rows= 0;
14703     Uint64 sum_commits= 0;
14704     Uint64 sum_row_size= 0;
14705     Uint64 sum_mem= 0;
14706     Uint64 sum_ext_space= 0;
14707     Uint64 sum_free_ext_space= 0;
14708     NdbScanOperation*pOp;
14709     int check;
14710 
14711     if ((pTrans= ndb->startTransaction()) == NULL)
14712     {
14713       error= ndb->getNdbError();
14714       goto retry;
14715     }
14716 
14717     NdbScanOperation::ScanOptions options;
14718     options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |
14719                             NdbScanOperation::ScanOptions::SO_GETVALUE |
14720                             NdbScanOperation::ScanOptions::SO_INTERPRETED;
14721     /* Set batch_size=1, as we need only one row per fragment. */
14722     options.batch= 1;
14723     options.extraGetValues= &extraGets[0];
14724     options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]);
14725     options.interpretedCode= &code;
14726 
14727     if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,
14728                                 empty_mask,
14729                                 &options,
14730                                 sizeof(NdbScanOperation::ScanOptions))) == NULL)
14731     {
14732       error= pTrans->getNdbError();
14733       goto retry;
14734     }
14735     thd_ndb->m_scan_count++;
14736     thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);
14737 
14738     thd_ndb->m_execute_count++;
14739     DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
14740     if (pTrans->execute(NdbTransaction::NoCommit,
14741                         NdbOperation::AbortOnError,
14742                         TRUE) == -1)
14743     {
14744       error= pTrans->getNdbError();
14745       goto retry;
14746     }
14747 
14748     while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)
14749     {
14750       DBUG_PRINT("info", ("nextResult rows: %llu, commits: %llu"
14751                           "fixed_mem_size %llu var_mem_size %llu "
14752                           "fragmentid %u extent_space %llu free_extent_space %llu",
14753                           rows, commits, fixed_mem, var_mem, fragid,
14754                           ext_space, free_ext_space));
14755 
14756       if ((part_id != ~(uint)0) && fragid != part_id)
14757       {
14758         continue;
14759       }
14760 
14761       sum_rows+= rows;
14762       sum_commits+= commits;
14763       if (sum_row_size < size)
14764         sum_row_size= size;
14765       sum_mem+= fixed_mem + var_mem;
14766       count++;
14767       sum_ext_space += ext_space;
14768       sum_free_ext_space += free_ext_space;
14769 
14770       if ((part_id != ~(uint)0) && fragid == part_id)
14771       {
14772         break;
14773       }
14774     }
14775 
14776     if (check == -1)
14777     {
14778       error= pOp->getNdbError();
14779       goto retry;
14780     }
14781 
14782     pOp->close(TRUE);
14783 
14784     ndb->closeTransaction(pTrans);
14785 
14786     ndbstat->row_count= sum_rows;
14787     ndbstat->commit_count= sum_commits;
14788     ndbstat->row_size= (ulong)sum_row_size;
14789     ndbstat->fragment_memory= sum_mem;
14790     ndbstat->fragment_extent_space= sum_ext_space;
14791     ndbstat->fragment_extent_free_space= sum_free_ext_space;
14792 
14793     DBUG_PRINT("exit", ("records: %llu commits: %llu row_size: %llu "
14794                         "mem: %llu allocated: %llu free: %llu count: %u",
14795                         sum_rows, sum_commits, sum_row_size,
14796                         sum_mem, sum_ext_space, sum_free_ext_space, count));
14797 
14798     DBUG_RETURN(0);
14799 retry:
14800     if(report_error)
14801     {
14802       if (file && pTrans)
14803       {
14804         reterr= file->ndb_err(pTrans);
14805       }
14806       else
14807       {
14808         const NdbError& tmp= error;
14809         ERR_PRINT(tmp);
14810         reterr= ndb_to_mysql_error(&tmp);
14811       }
14812     }
14813     else
14814       reterr= error.code;
14815 
14816     if (pTrans)
14817     {
14818       ndb->closeTransaction(pTrans);
14819       pTrans= NULL;
14820     }
14821     if (error.status == NdbError::TemporaryError &&
14822         retries-- && !thd->killed)
14823     {
14824       do_retry_sleep(retry_sleep);
14825       continue;
14826     }
14827     break;
14828   } while(1);
14829   DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
14830                       error.code, error.message));
14831   DBUG_RETURN(reterr);
14832 }
14833 
14834 /**
14835   Create a .ndb file to serve as a placeholder indicating
14836   that the table with this name is a ndb table.
14837 */
14838 
write_ndb_file(const char * name) const14839 int ha_ndbcluster::write_ndb_file(const char *name) const
14840 {
14841   File file;
14842   bool error=1;
14843   char path[FN_REFLEN];
14844 
14845   DBUG_ENTER("write_ndb_file");
14846   DBUG_PRINT("enter", ("name: %s", name));
14847 
14848 #ifndef EMBEDDED_LIBRARY
14849   (void)strxnmov(path, FN_REFLEN-1,
14850                  mysql_data_home,"/",name,ha_ndb_ext,NullS);
14851 #else
14852   (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);
14853 #endif
14854 
14855   if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
14856   {
14857     // It's an empty file
14858     error=0;
14859     my_close(file,MYF(0));
14860   }
14861   DBUG_RETURN(error);
14862 }
14863 
check_read_before_write_removal()14864 void ha_ndbcluster::check_read_before_write_removal()
14865 {
14866   DBUG_ENTER("check_read_before_write_removal");
14867 
14868   /* Must have determined that rbwr is possible */
14869   assert(m_read_before_write_removal_possible);
14870   m_read_before_write_removal_used= true;
14871 
14872   /* Can't use on table with hidden primary key */
14873   assert(table_share->primary_key != MAX_KEY);
14874 
14875   /* Index must be unique */
14876   DBUG_PRINT("info", ("using index %d", active_index));
14877   const KEY *key= table->key_info + active_index;
14878   assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);
14879 
14880   DBUG_VOID_RETURN;
14881 }
14882 
14883 
14884 /****************************************************************************
14885  * MRR interface implementation
14886  ***************************************************************************/
14887 
14888 /**
14889    We will not attempt to deal with more than this many ranges in a single
14890    MRR execute().
14891 */
14892 #define MRR_MAX_RANGES 128
14893 
14894 /*
14895   Types of ranges during multi_range_read.
14896 
14897   Code assumes that X < enum_ordered_range is a valid check for range converted
14898   to key operation.
14899 */
14900 enum multi_range_types
14901 {
14902   enum_unique_range,            /// Range converted to key operation
14903   enum_empty_unique_range,      /// No data found (in key operation)
14904   enum_ordered_range,           /// Normal ordered index scan range
14905   enum_skip_range               /// Empty range (eg. partition pruning)
14906 };
14907 
14908 /**
14909   Usage of the MRR buffer is as follows:
14910 
14911   First, N char * values, each being the custom value obtained from
14912   RANGE_SEQ_IF::next() that needs to be returned from multi_range_read_next().
14913   N is usually == total number of ranges, but never more than MRR_MAX_RANGES
14914   (the MRR is split across several execute()s if necessary). N may be lower
14915   than actual number of ranges in a single execute() in case of split for
14916   other reasons.
14917 
14918   This is followed by N variable-sized entries, each
14919 
14920    - 1 byte of multi_range_types for this range.
14921 
14922    - (Only) for ranges converted to key operations (enum_unique_range and
14923      enum_empty_unique_range), this is followed by table_share->reclength
14924      bytes of row data.
14925 */
14926 
14927 static inline
multi_range_buffer_size(const HANDLER_BUFFER * buffer)14928 ulong multi_range_buffer_size(const HANDLER_BUFFER* buffer)
14929 {
14930   const size_t buf_size = buffer->buffer_end - buffer->buffer;
14931   assert(buf_size < ULONG_MAX);
14932   return (ulong)buf_size;
14933 }
14934 
14935 /* Return the needed size of the fixed array at start of HANDLER_BUFFER. */
14936 static ulong
multi_range_fixed_size(int num_ranges)14937 multi_range_fixed_size(int num_ranges)
14938 {
14939   if (num_ranges > MRR_MAX_RANGES)
14940     num_ranges= MRR_MAX_RANGES;
14941   return num_ranges * sizeof(char *);
14942 }
14943 
14944 /* Return max number of ranges so that fixed part will still fit in buffer. */
14945 static int
multi_range_max_ranges(int num_ranges,ulong bufsize)14946 multi_range_max_ranges(int num_ranges, ulong bufsize)
14947 {
14948   if (num_ranges > MRR_MAX_RANGES)
14949     num_ranges= MRR_MAX_RANGES;
14950   if (num_ranges * sizeof(char *) > bufsize)
14951     num_ranges= bufsize / sizeof(char *);
14952   return num_ranges;
14953 }
14954 
14955 /* Return the size in HANDLER_BUFFER of a variable-sized entry. */
14956 static ulong
multi_range_entry_size(my_bool use_keyop,ulong reclength)14957 multi_range_entry_size(my_bool use_keyop, ulong reclength)
14958 {
14959   /* Space for type byte. */
14960   ulong len= 1;
14961   if (use_keyop)
14962     len+= reclength;
14963   return len;
14964 }
14965 
14966 /*
14967   Return the maximum size of a variable-sized entry in HANDLER_BUFFER.
14968 
14969   Actual size may depend on key values (whether the actual value can be
14970   converted to a hash key operation or needs to be done as an ordered index
14971   scan).
14972 */
14973 static ulong
multi_range_max_entry(NDB_INDEX_TYPE keytype,ulong reclength)14974 multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength)
14975 {
14976   return multi_range_entry_size(keytype != ORDERED_INDEX, reclength);
14977 }
14978 
14979 static uchar &
multi_range_entry_type(uchar * p)14980 multi_range_entry_type(uchar *p)
14981 {
14982   return *p;
14983 }
14984 
14985 /* Find the start of the next entry in HANDLER_BUFFER. */
14986 static uchar *
multi_range_next_entry(uchar * p,ulong reclength)14987 multi_range_next_entry(uchar *p, ulong reclength)
14988 {
14989   my_bool use_keyop= multi_range_entry_type(p) < enum_ordered_range;
14990   return p + multi_range_entry_size(use_keyop, reclength);
14991 }
14992 
14993 /* Get pointer to row data (for range converted to key operation). */
14994 static uchar *
multi_range_row(uchar * p)14995 multi_range_row(uchar *p)
14996 {
14997   assert(multi_range_entry_type(p) == enum_unique_range);
14998   return p + 1;
14999 }
15000 
15001 /* Get and put upper layer custom char *, use memcpy() for unaligned access. */
15002 static char *
multi_range_get_custom(HANDLER_BUFFER * buffer,int range_no)15003 multi_range_get_custom(HANDLER_BUFFER *buffer, int range_no)
15004 {
15005   assert(range_no < MRR_MAX_RANGES);
15006   char* res;
15007   memcpy(&res, buffer->buffer + range_no*sizeof(char*), sizeof(char*));
15008   return res;
15009 }
15010 
15011 static void
multi_range_put_custom(HANDLER_BUFFER * buffer,int range_no,char * custom)15012 multi_range_put_custom(HANDLER_BUFFER *buffer, int range_no, char *custom)
15013 {
15014   assert(range_no < MRR_MAX_RANGES);
15015   // memcpy() required for unaligned access.
15016   memcpy(buffer->buffer + range_no*sizeof(char*), &custom, sizeof(char*));
15017 }
15018 
15019 /*
15020   This is used to check if an ordered index scan is needed for a range in
15021   a multi range read.
15022   If a scan is not needed, we use a faster primary/unique key operation
15023   instead.
15024 */
15025 static my_bool
read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,const KEY * key_info,const KEY_MULTI_RANGE * r,bool is_pushed)15026 read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,
15027                       const KEY_MULTI_RANGE *r, bool is_pushed)
15028 {
15029   if (cur_index_type == ORDERED_INDEX || is_pushed)
15030     return TRUE;
15031   if (cur_index_type == PRIMARY_KEY_INDEX)
15032     return FALSE;
15033   if (cur_index_type == UNIQUE_INDEX) {  // a 'UNIQUE ... USING HASH' index
15034     // UNIQUE_INDEX is used iff optimizer set HA_MRR_NO_NULL_ENDPOINTS.
15035     // Assert that there are no NULL values in key as promissed.
15036     assert(!check_null_in_key(key_info, r->start_key.key, r->start_key.length));
15037     return FALSE;
15038   }
15039   assert(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
15040          cur_index_type == UNIQUE_ORDERED_INDEX);
15041   if (r->start_key.length != key_info->key_length ||
15042       r->start_key.flag != HA_READ_KEY_EXACT)
15043     return TRUE;                                // Not exact match, need scan
15044   if (cur_index_type == UNIQUE_ORDERED_INDEX &&
15045       check_null_in_key(key_info, r->start_key.key,r->start_key.length))
15046     return TRUE;                                // Can't use for NULL values
15047   return FALSE;
15048 }
15049 
15050 /*
15051   Get cost and other information about MRR scan over a known list of ranges
15052 
15053   SYNOPSIS
15054     See handler::multi_range_read_info_const.
15055 
15056   DESCRIPTION
15057     The implementation is copied from handler::multi_range_read_info_const.
15058     The only difference is that NDB-MRR cannot handle blob columns or keys
15059     with NULLs for unique indexes. We disable MRR for those cases.
15060 
15061   NOTES
15062     See NOTES for handler::multi_range_read_info_const().
15063 */
15064 
15065 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)15066 ha_ndbcluster::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
15067                                            void *seq_init_param,
15068                                            uint n_ranges, uint *bufsz,
15069                                            uint *flags, Cost_estimate *cost)
15070 {
15071   ha_rows rows;
15072   uint def_flags= *flags;
15073   uint def_bufsz= *bufsz;
15074 
15075   DBUG_ENTER("ha_ndbcluster::multi_range_read_info_const");
15076 
15077   /* Get cost/flags/mem_usage of default MRR implementation */
15078   rows= handler::multi_range_read_info_const(keyno, seq, seq_init_param,
15079                                              n_ranges, &def_bufsz,
15080                                              &def_flags, cost);
15081   if (unlikely(rows == HA_POS_ERROR))
15082   {
15083     DBUG_RETURN(rows);
15084   }
15085 
15086   /*
15087     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is
15088     an order to use the default MRR implementation.
15089     Otherwise, make a choice based on requested *flags, handler
15090     capabilities, cost and mrr* flags of @@optimizer_switch.
15091   */
15092   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
15093       choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost))
15094   {
15095     DBUG_PRINT("info", ("Default MRR implementation choosen"));
15096     *flags= def_flags;
15097     *bufsz= def_bufsz;
15098     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
15099   }
15100   else
15101   {
15102     /* *flags and *bufsz were set by choose_mrr_impl */
15103     DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
15104     assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
15105   }
15106   DBUG_RETURN(rows);
15107 }
15108 
15109 
15110 /*
15111   Get cost and other information about MRR scan over some sequence of ranges
15112 
15113   SYNOPSIS
15114     See handler::multi_range_read_info.
15115 */
15116 
15117 ha_rows
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)15118 ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
15119                                      uint *bufsz, uint *flags,
15120                                      Cost_estimate *cost)
15121 {
15122   ha_rows res;
15123   uint def_flags= *flags;
15124   uint def_bufsz= *bufsz;
15125 
15126   DBUG_ENTER("ha_ndbcluster::multi_range_read_info");
15127 
15128   /* Get cost/flags/mem_usage of default MRR implementation */
15129   res= handler::multi_range_read_info(keyno, n_ranges, n_rows,
15130                                       &def_bufsz, &def_flags,
15131                                       cost);
15132   if (unlikely(res == HA_POS_ERROR))
15133   {
15134     /* Default implementation can't perform MRR scan => we can't either */
15135     DBUG_RETURN(res);
15136   }
15137   assert(!res);
15138 
15139   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
15140       choose_mrr_impl(keyno, n_ranges, n_rows, bufsz, flags, cost))
15141   {
15142     /* Default implementation is choosen */
15143     DBUG_PRINT("info", ("Default MRR implementation choosen"));
15144     *flags= def_flags;
15145     *bufsz= def_bufsz;
15146     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
15147   }
15148   else
15149   {
15150     /* *flags and *bufsz were set by choose_mrr_impl */
15151     DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
15152     assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
15153   }
15154   DBUG_RETURN(res);
15155 }
15156 
15157 /**
15158   Internals: Choose between Default MRR implementation and
15159                     native ha_ndbcluster MRR
15160 
15161   Make the choice between using Default MRR implementation and ha_ndbcluster-MRR.
15162   This function contains common functionality factored out of multi_range_read_info()
15163   and multi_range_read_info_const(). The function assumes that the default MRR
15164   implementation's applicability requirements are satisfied.
15165 
15166   @param keyno       Index number
15167   @param n_ranges    Number of ranges/keys (i.e. intervals) in the range sequence.
15168   @param n_rows      E(full rows to be retrieved)
15169   @param bufsz  OUT  If DS-MRR is choosen, buffer use of DS-MRR implementation
15170                      else the value is not modified
15171   @param flags  IN   MRR flags provided by the MRR user
15172                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
15173                      else the value is not modified
15174   @param cost   IN   Cost of default MRR implementation
15175                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
15176                      else the value is not modified
15177 
15178   @retval TRUE   Default MRR implementation should be used
15179   @retval FALSE  NDB-MRR implementation should be used
15180 */
15181 
choose_mrr_impl(uint keyno,uint n_ranges,ha_rows n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)15182 bool ha_ndbcluster::choose_mrr_impl(uint keyno, uint n_ranges, ha_rows n_rows,
15183                                     uint *bufsz, uint *flags, Cost_estimate *cost)
15184 {
15185   THD *thd= current_thd;
15186   NDB_INDEX_TYPE key_type= get_index_type(keyno);
15187 
15188   get_read_set(true, keyno);
15189 
15190   /* Disable MRR on blob read and on NULL lookup in unique index. */
15191   if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
15192       uses_blob_value(table->read_set) ||
15193       ( key_type == UNIQUE_INDEX &&
15194         has_null_in_unique_index(keyno) &&
15195         !(*flags & HA_MRR_NO_NULL_ENDPOINTS)))
15196   {
15197     /* Use the default implementation, don't modify args: See comments  */
15198     return true;
15199   }
15200 
15201   /**
15202    * Calculate *bufsz, fallback to default MRR if we can't allocate
15203    * suffient buffer space for NDB-MRR
15204    */
15205   {
15206     uint save_bufsize= *bufsz;
15207     ulong reclength= table_share->reclength;
15208     uint entry_size= multi_range_max_entry(key_type, reclength);
15209     uint min_total_size= entry_size + multi_range_fixed_size(1);
15210     DBUG_PRINT("info", ("MRR bufsize suggested=%u want=%u limit=%d",
15211                         save_bufsize, (uint)(n_rows + 1) * entry_size,
15212                         (*flags & HA_MRR_LIMITS) != 0));
15213     if (save_bufsize < min_total_size)
15214     {
15215       if (*flags & HA_MRR_LIMITS)
15216       {
15217         /* Too small buffer limit for native NDB-MRR. */
15218         return true;
15219       }
15220       *bufsz= min_total_size;
15221     }
15222     else
15223     {
15224       uint max_ranges= (n_ranges > 0) ? n_ranges : MRR_MAX_RANGES;
15225       *bufsz= min(save_bufsize,
15226                   (uint)(n_rows * entry_size + multi_range_fixed_size(max_ranges)));
15227     }
15228     DBUG_PRINT("info", ("MRR bufsize set to %u", *bufsz));
15229   }
15230 
15231   /**
15232    * Cost based MRR optimization is known to be incorrect.
15233    * Disabled -> always use NDB-MRR whenever possible
15234    */
15235   *flags&= ~HA_MRR_USE_DEFAULT_IMPL;
15236   *flags|= HA_MRR_SUPPORT_SORTED;
15237 
15238   return false;
15239 }
15240 
15241 
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buffer)15242 int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs,
15243                                          void *seq_init_param,
15244                                          uint n_ranges, uint mode,
15245                                          HANDLER_BUFFER *buffer)
15246 {
15247   int error;
15248   DBUG_ENTER("ha_ndbcluster::multi_range_read_init");
15249 
15250   /*
15251     If supplied buffer is smaller than needed for just one range, we cannot do
15252     multi_range_read.
15253   */
15254   const ulong bufsize= multi_range_buffer_size(buffer);
15255 
15256   if (mode & HA_MRR_USE_DEFAULT_IMPL
15257       || bufsize < multi_range_fixed_size(1) +
15258                    multi_range_max_entry(get_index_type(active_index),
15259                                          table_share->reclength)
15260       || (m_pushed_join_operation==PUSHED_ROOT &&
15261          !m_disable_pushed_join &&
15262          !m_pushed_join_member->get_query_def().isScanQuery())
15263       || m_delete_cannot_batch || m_update_cannot_batch)
15264   {
15265     m_disable_multi_read= TRUE;
15266     DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
15267                                                n_ranges, mode, buffer));
15268   }
15269 
15270   /**
15271    * There may still be an open m_multi_cursor from the previous mrr access on this handler.
15272    * Close it now to free up resources for this NdbScanOperation.
15273    */
15274   if (unlikely((error= close_scan())))
15275     DBUG_RETURN(error);
15276 
15277   m_disable_multi_read= FALSE;
15278 
15279   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
15280   /*
15281     Copy arguments into member variables
15282   */
15283   multi_range_buffer= buffer;
15284   mrr_funcs= *seq_funcs;
15285   mrr_iter= mrr_funcs.init(seq_init_param, n_ranges, mode);
15286   ranges_in_seq= n_ranges;
15287   m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range);
15288   mrr_need_range_assoc = !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
15289   if (mrr_need_range_assoc)
15290   {
15291     ha_statistic_increment(&SSV::ha_multi_range_read_init_count);
15292   }
15293 
15294   /*
15295     We do not start fetching here with execute(), rather we defer this to the
15296     first call to multi_range_read_next() by setting first_running_range and
15297     first_unstarted_range like this.
15298 
15299     The reason is that the MRR interface is designed so that in some cases
15300     multi_range_read_next() may never get called (eg. in case of WHERE
15301     condition on previous table that is never satisfied). So we may not need
15302     to fetch anything.
15303 
15304     Also, at the time of writing, returning an error from
15305     multi_range_read_init() does not correctly set the error status, so we get
15306     an assert on missing result status in net_end_statement().
15307   */
15308   first_running_range= 0;
15309   first_unstarted_range= 0;
15310 
15311   DBUG_RETURN(0);
15312 }
15313 
15314 
multi_range_start_retrievals(uint starting_range)15315 int ha_ndbcluster::multi_range_start_retrievals(uint starting_range)
15316 {
15317   KEY* key_info= table->key_info + active_index;
15318   ulong reclength= table_share->reclength;
15319   const NdbOperation* op;
15320   NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
15321   const NdbOperation *oplist[MRR_MAX_RANGES];
15322   uint num_keyops= 0;
15323   NdbTransaction *trans= m_thd_ndb->trans;
15324   int error;
15325   const bool is_pushed=
15326     check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
15327                       active_index);
15328 
15329   DBUG_ENTER("multi_range_start_retrievals");
15330 
15331   /*
15332    * read multi range will read ranges as follows (if not ordered)
15333    *
15334    * input    read order
15335    * ======   ==========
15336    * pk-op 1  pk-op 1
15337    * pk-op 2  pk-op 2
15338    * range 3  range (3,5) NOTE result rows will be intermixed
15339    * pk-op 4  pk-op 4
15340    * range 5
15341    * pk-op 6  pk-op 6
15342    */
15343 
15344   /*
15345     We loop over all ranges, converting into primary/unique key operations if
15346     possible, and adding ranges to an ordered index scan for the rest.
15347 
15348     If the supplied HANDLER_BUFFER is too small, we may also need to do only
15349     part of the multi read at once.
15350   */
15351 
15352   assert(cur_index_type != UNDEFINED_INDEX);
15353   assert(m_multi_cursor==NULL);
15354   assert(m_active_query==NULL);
15355 
15356   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
15357   const uchar *end_of_buffer= multi_range_buffer->buffer_end;
15358 
15359   /*
15360     Normally we should have sufficient buffer for the whole fixed_sized part.
15361     But we need to make sure we do not crash if upper layer gave us a _really_
15362     small buffer.
15363 
15364     We already checked (in multi_range_read_init()) that we got enough buffer
15365     for at least one range.
15366   */
15367   uint min_entry_size=
15368     multi_range_entry_size(!read_multi_needs_scan(cur_index_type, key_info,
15369                                                   &mrr_cur_range, is_pushed),
15370                                                   reclength);
15371   const ulong bufsize= multi_range_buffer_size(multi_range_buffer);
15372   int max_range= multi_range_max_ranges(ranges_in_seq,
15373                                         bufsize - min_entry_size);
15374   assert(max_range > 0);
15375   uchar *row_buf= multi_range_buffer->buffer + multi_range_fixed_size(max_range);
15376   m_multi_range_result_ptr= row_buf;
15377 
15378   int range_no= 0;
15379   int mrr_range_no= starting_range;
15380   bool any_real_read= FALSE;
15381 
15382   if (m_read_before_write_removal_possible)
15383     check_read_before_write_removal();
15384 
15385   for (;
15386        !m_range_res;
15387        range_no++, m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))
15388   {
15389     if (range_no >= max_range)
15390       break;
15391     my_bool need_scan=
15392       read_multi_needs_scan(cur_index_type, key_info, &mrr_cur_range, is_pushed);
15393     if (row_buf + multi_range_entry_size(!need_scan, reclength) > end_of_buffer)
15394       break;
15395     if (need_scan)
15396     {
15397       if (range_no > NdbIndexScanOperation::MaxRangeNo)
15398         break;
15399       /*
15400         Check how much KEYINFO data we already used for index bounds, and
15401         split the MRR here if it exceeds a certain limit. This way we avoid
15402         overloading the TC block in the ndb kernel.
15403 
15404         The limit used is based on the value MAX_KEY_SIZE_IN_WORDS.
15405       */
15406       if (m_multi_cursor && m_multi_cursor->getCurrentKeySize() >= 1000)
15407         break;
15408     }
15409 
15410     mrr_range_no++;
15411     multi_range_put_custom(multi_range_buffer, range_no, mrr_cur_range.ptr);
15412 
15413     part_id_range part_spec;
15414     if (m_use_partition_pruning)
15415     {
15416       get_partition_set(table, table->record[0], active_index,
15417                         &mrr_cur_range.start_key,
15418                         &part_spec);
15419       DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
15420                           part_spec.start_part, part_spec.end_part));
15421       /*
15422         If partition pruning has found no partition in set
15423         we can skip this scan
15424       */
15425       if (part_spec.start_part > part_spec.end_part)
15426       {
15427         /*
15428           We can skip this range since the key won't fit into any
15429           partition
15430         */
15431         multi_range_entry_type(row_buf)= enum_skip_range;
15432         row_buf= multi_range_next_entry(row_buf, reclength);
15433         continue;
15434       }
15435       if (!trans &&
15436           (part_spec.start_part == part_spec.end_part))
15437         if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
15438                                                         error))))
15439           DBUG_RETURN(error);
15440     }
15441 
15442     if (need_scan)
15443     {
15444       if (!trans)
15445       {
15446         // ToDo see if we can use start_transaction_key here instead
15447         if (!m_use_partition_pruning)
15448         {
15449           get_partition_set(table, table->record[0], active_index,
15450                             &mrr_cur_range.start_key,
15451                             &part_spec);
15452           if (part_spec.start_part == part_spec.end_part)
15453           {
15454             if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
15455                                                             error))))
15456               DBUG_RETURN(error);
15457           }
15458           else if (unlikely(!(trans= start_transaction(error))))
15459             DBUG_RETURN(error);
15460         }
15461         else if (unlikely(!(trans= start_transaction(error))))
15462           DBUG_RETURN(error);
15463       }
15464 
15465       any_real_read= TRUE;
15466       DBUG_PRINT("info", ("any_real_read= TRUE"));
15467 
15468       /* Create the scan operation for the first scan range. */
15469       if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
15470                             active_index))
15471       {
15472         assert(!m_read_before_write_removal_used);
15473         if (!m_active_query)
15474         {
15475           const int error= create_pushed_join();
15476           if (unlikely(error))
15477             DBUG_RETURN(error);
15478 
15479           NdbQuery* const query= m_active_query;
15480           if (mrr_is_output_sorted &&
15481               query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))
15482             ERR_RETURN(query->getNdbError());
15483         }
15484       } // check_if_pushable()
15485       else
15486       if (!m_multi_cursor)
15487       {
15488         /* Do a multi-range index scan for ranges not done by primary/unique key. */
15489         NdbScanOperation::ScanOptions options;
15490         NdbInterpretedCode code(m_table);
15491 
15492         options.optionsPresent=
15493           NdbScanOperation::ScanOptions::SO_SCANFLAGS |
15494           NdbScanOperation::ScanOptions::SO_PARALLEL;
15495 
15496         options.scan_flags=
15497           NdbScanOperation::SF_ReadRangeNo |
15498           NdbScanOperation::SF_MultiRange;
15499 
15500         if (lm == NdbOperation::LM_Read)
15501           options.scan_flags|= NdbScanOperation::SF_KeyInfo;
15502         if (mrr_is_output_sorted)
15503           options.scan_flags|= NdbScanOperation::SF_OrderByFull;
15504 
15505         options.parallel= DEFAULT_PARALLELISM;
15506 
15507         NdbOperation::GetValueSpec gets[2];
15508         if (table_share->primary_key == MAX_KEY)
15509           get_hidden_fields_scan(&options, gets);
15510 
15511         if (m_cond && m_cond->generate_scan_filter(&code, &options))
15512           ERR_RETURN(code.getNdbError());
15513 
15514         /* Define scan */
15515         NdbIndexScanOperation *scanOp= trans->scanIndex
15516           (m_index[active_index].ndb_record_key,
15517            m_ndb_record,
15518            lm,
15519            (uchar *)(table->read_set->bitmap),
15520            NULL, /* All bounds specified below */
15521            &options,
15522            sizeof(NdbScanOperation::ScanOptions));
15523 
15524         if (!scanOp)
15525           ERR_RETURN(trans->getNdbError());
15526 
15527         m_multi_cursor= scanOp;
15528 
15529         /* Can't have blobs in multi range read */
15530         assert(!uses_blob_value(table->read_set));
15531 
15532         /* We set m_next_row=0 to m that no row was fetched from the scan yet. */
15533         m_next_row= 0;
15534       }
15535 
15536       Ndb::PartitionSpec ndbPartitionSpec;
15537       const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;
15538 
15539       /* If this table uses user-defined partitioning, use MySQLD provided
15540        * partition info as pruning info
15541        * Otherwise, scan range pruning is performed automatically by
15542        * NDBAPI based on distribution key values.
15543        */
15544       if (m_use_partition_pruning &&
15545           m_user_defined_partitioning &&
15546           (part_spec.start_part == part_spec.end_part))
15547       {
15548         DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",
15549                             part_spec.start_part));
15550         ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;
15551         ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;
15552         ndbPartSpecPtr= &ndbPartitionSpec;
15553       }
15554 
15555       /* Include this range in the ordered index scan. */
15556       NdbIndexScanOperation::IndexBound bound;
15557       compute_index_bounds(bound, key_info,
15558 			   &mrr_cur_range.start_key, &mrr_cur_range.end_key, 0);
15559       bound.range_no= range_no;
15560 
15561       const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
15562       if (m_active_query)
15563       {
15564         DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));
15565         if (m_active_query->setBound(key_rec, &bound))
15566         {
15567           ERR_RETURN(trans->getNdbError());
15568         }
15569       }
15570       else
15571       {
15572         if (m_multi_cursor->setBound(m_index[active_index].ndb_record_key,
15573                                      bound,
15574                                      ndbPartSpecPtr, // Only for user-def tables
15575                                      sizeof(Ndb::PartitionSpec)))
15576         {
15577           ERR_RETURN(trans->getNdbError());
15578         }
15579       }
15580 
15581       multi_range_entry_type(row_buf)= enum_ordered_range;
15582       row_buf= multi_range_next_entry(row_buf, reclength);
15583     }
15584     else
15585     {
15586       multi_range_entry_type(row_buf)= enum_unique_range;
15587 
15588       if (!trans)
15589       {
15590         assert(active_index != MAX_KEY);
15591         if (unlikely(!(trans= start_transaction_key(active_index,
15592                                                     mrr_cur_range.start_key.key,
15593                                                     error))))
15594           DBUG_RETURN(error);
15595       }
15596 
15597       if (m_read_before_write_removal_used)
15598       {
15599         DBUG_PRINT("info", ("m_read_before_write_removal_used == TRUE"));
15600 
15601         /* Key will later be returned as result record.
15602          * Save it in 'row_buf' from where it will later retrieved.
15603          */
15604         key_restore(multi_range_row(row_buf),
15605                     (uchar*)mrr_cur_range.start_key.key,
15606                     key_info, key_info->key_length);
15607 
15608         op= NULL;  // read_before_write_removal
15609       }
15610       else
15611       {
15612         any_real_read= TRUE;
15613         DBUG_PRINT("info", ("any_real_read= TRUE"));
15614 
15615         /* Convert to primary/unique key operation. */
15616         Uint32 partitionId;
15617         Uint32* ppartitionId = NULL;
15618 
15619         if (m_user_defined_partitioning &&
15620             (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
15621              cur_index_type == PRIMARY_KEY_INDEX))
15622         {
15623           partitionId=part_spec.start_part;
15624           ppartitionId=&partitionId;
15625         }
15626 
15627         /**
15628          * 'Pushable codepath' is incomplete and expected not
15629          * to be produced as make_join_pushed() handle
15630          * AT_MULTI_UNIQUE_KEY as non-pushable.
15631          */
15632         if (m_pushed_join_operation==PUSHED_ROOT &&
15633             !m_disable_pushed_join &&
15634             !m_pushed_join_member->get_query_def().isScanQuery())
15635         {
15636           op= NULL;            // Avoid compiler warning
15637           assert(false);  // FIXME: Incomplete code, should not be executed
15638           assert(lm == NdbOperation::LM_CommittedRead);
15639           const int error= pk_unique_index_read_key_pushed(active_index,
15640                                                            mrr_cur_range.start_key.key,
15641                                                            ppartitionId);
15642           if (unlikely(error))
15643             DBUG_RETURN(error);
15644         }
15645         else
15646         {
15647           if (m_pushed_join_operation == PUSHED_ROOT)
15648           {
15649             DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));
15650             m_thd_ndb->m_pushed_queries_dropped++;
15651           }
15652           if (!(op= pk_unique_index_read_key(active_index,
15653                                              mrr_cur_range.start_key.key,
15654                                              multi_range_row(row_buf), lm,
15655                                              ppartitionId)))
15656             ERR_RETURN(trans->getNdbError());
15657         }
15658       }
15659       oplist[num_keyops++]= op;
15660       row_buf= multi_range_next_entry(row_buf, reclength);
15661     }
15662   }
15663 
15664   if (m_active_query != NULL &&
15665       m_pushed_join_member->get_query_def().isScanQuery())
15666   {
15667     m_thd_ndb->m_scan_count++;
15668     if (mrr_is_output_sorted)
15669     {
15670       m_thd_ndb->m_sorted_scan_count++;
15671     }
15672 
15673     bool prunable= false;
15674     if (unlikely(m_active_query->isPrunable(prunable) != 0))
15675       ERR_RETURN(m_active_query->getNdbError());
15676     if (prunable)
15677       m_thd_ndb->m_pruned_scan_count++;
15678 
15679     DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));
15680     assert(!m_multi_cursor);
15681   }
15682   if (m_multi_cursor)
15683   {
15684     DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",
15685                         m_multi_cursor->getPruned()));
15686     m_thd_ndb->m_scan_count++;
15687     m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);
15688     if (mrr_is_output_sorted)
15689     {
15690       m_thd_ndb->m_sorted_scan_count++;
15691     }
15692   }
15693 
15694   if (any_real_read && execute_no_commit_ie(m_thd_ndb, trans))
15695     ERR_RETURN(trans->getNdbError());
15696 
15697   if (!m_range_res)
15698   {
15699     DBUG_PRINT("info",
15700                ("Split MRR read, %d-%d of %d bufsize=%lu used=%lu range_no=%d",
15701                 starting_range, mrr_range_no - 1, ranges_in_seq,
15702                 (ulong)(end_of_buffer - multi_range_buffer->buffer),
15703                 (ulong)(row_buf - multi_range_buffer->buffer), range_no));
15704     /*
15705       Mark that we're using entire buffer (even if might not) as we are not
15706       reading read all ranges yet.
15707 
15708       This as we don't want mysqld to reuse the buffer when we read the
15709       remaining ranges.
15710     */
15711     multi_range_buffer->end_of_used_area= multi_range_buffer->buffer_end;
15712   }
15713   else
15714     multi_range_buffer->end_of_used_area= row_buf;
15715 
15716   first_running_range= first_range_in_batch= starting_range;
15717   first_unstarted_range= mrr_range_no;
15718   m_current_range_no= 0;
15719 
15720   /*
15721     Now we need to inspect all ranges that were converted to key operations.
15722 
15723     We need to check for any error (in particular NoDataFound), and remember
15724     the status, since the operation pointer may no longer be valid when we
15725     actually get to it in multi_range_next_entry() (we may have done further
15726     execute()'s in a different handler object during joins eg.)
15727   */
15728   row_buf= m_multi_range_result_ptr;
15729   uint op_idx= 0;
15730   for (uint r= first_range_in_batch; r < first_unstarted_range; r++)
15731   {
15732     uchar &type_loc= multi_range_entry_type(row_buf);
15733     row_buf= multi_range_next_entry(row_buf, reclength);
15734     if (type_loc >= enum_ordered_range)
15735       continue;
15736 
15737     assert(op_idx < MRR_MAX_RANGES);
15738     if ((op= oplist[op_idx++]) == NULL)
15739       continue;  // read_before_write_removal
15740 
15741     const NdbError &error= op->getNdbError();
15742     if (error.code != 0)
15743     {
15744       if (error.classification == NdbError::NoDataFound)
15745         type_loc= enum_empty_unique_range;
15746       else
15747       {
15748         /*
15749           This shouldn't really happen.
15750 
15751           There aren't really any other errors that could happen on the read
15752           without also aborting the transaction and causing execute() to
15753           return failure.
15754 
15755           (But we can still safely return an error code in non-debug builds).
15756         */
15757         assert(FALSE);
15758         ERR_RETURN(error);      /* purecov: deadcode */
15759       }
15760     }
15761   }
15762 
15763   DBUG_RETURN(0);
15764 }
15765 
multi_range_read_next(char ** range_info)15766 int ha_ndbcluster::multi_range_read_next(char **range_info)
15767 {
15768   int res;
15769   DBUG_ENTER("ha_ndbcluster::multi_range_read_next");
15770 
15771   if (m_disable_multi_read)
15772   {
15773     DBUG_RETURN(handler::multi_range_read_next(range_info));
15774   }
15775 
15776   for(;;)
15777   {
15778     /* for each range (we should have remembered the number) */
15779     while (first_running_range < first_unstarted_range)
15780     {
15781       uchar *row_buf= m_multi_range_result_ptr;
15782       int expected_range_no= first_running_range - first_range_in_batch;
15783 
15784       switch (multi_range_entry_type(row_buf))
15785       {
15786         case enum_skip_range:
15787         case enum_empty_unique_range:
15788           /* Nothing in this range; continue with next. */
15789           break;
15790 
15791         case enum_unique_range:
15792           /*
15793             Move to next range; we can have at most one record from a unique
15794             range.
15795           */
15796           first_running_range++;
15797           m_multi_range_result_ptr=
15798             multi_range_next_entry(m_multi_range_result_ptr,
15799                                    table_share->reclength);
15800 
15801           /*
15802             Clear m_active_cursor; it is used as a flag in update_row() /
15803             delete_row() to know whether the current tuple is from a scan
15804             or pk operation.
15805           */
15806           m_active_cursor= NULL;
15807 
15808           /* Return the record. */
15809           *range_info= multi_range_get_custom(multi_range_buffer,
15810                                               expected_range_no);
15811           memcpy(table->record[0], multi_range_row(row_buf),
15812                  table_share->reclength);
15813           DBUG_RETURN(0);
15814 
15815         case enum_ordered_range:
15816           /* An index scan range. */
15817           {
15818             int res;
15819             if ((res= read_multi_range_fetch_next()) != 0)
15820             {
15821               *range_info= multi_range_get_custom(multi_range_buffer,
15822                                                   expected_range_no);
15823               first_running_range++;
15824               m_multi_range_result_ptr=
15825                 multi_range_next_entry(m_multi_range_result_ptr,
15826                                        table_share->reclength);
15827               DBUG_RETURN(res);
15828             }
15829           }
15830           if (!m_next_row)
15831           {
15832             /*
15833               The whole scan is done, and the cursor has been closed.
15834               So nothing more for this range. Move to next.
15835             */
15836             break;
15837           }
15838           else
15839           {
15840             int current_range_no= m_current_range_no;
15841             /*
15842               For a sorted index scan, we will receive rows in increasing
15843               range_no order, so we can return ranges in order, pausing when
15844               range_no indicate that the currently processed range
15845               (first_running_range) is done.
15846 
15847               But for unsorted scan, we may receive a high range_no from one
15848               fragment followed by a low range_no from another fragment. So we
15849               need to process all index scan ranges together.
15850             */
15851             if (!mrr_is_output_sorted || expected_range_no == current_range_no)
15852             {
15853               *range_info= multi_range_get_custom(multi_range_buffer,
15854                                                   current_range_no);
15855               /* Copy out data from the new row. */
15856               unpack_record(table->record[0], m_next_row);
15857               table->status= 0;
15858               /*
15859                 Mark that we have used this row, so we need to fetch a new
15860                 one on the next call.
15861               */
15862               m_next_row= 0;
15863               /*
15864                 Set m_active_cursor; it is used as a flag in update_row() /
15865                 delete_row() to know whether the current tuple is from a scan or
15866                 pk operation.
15867               */
15868               m_active_cursor= m_multi_cursor;
15869 
15870               DBUG_RETURN(0);
15871             }
15872             else if (current_range_no > expected_range_no)
15873             {
15874               /* Nothing more in scan for this range. Move to next. */
15875               break;
15876             }
15877             else
15878             {
15879               /*
15880                 Should not happen. Ranges should be returned from NDB API in
15881                 the order we requested them.
15882               */
15883               assert(0);
15884               break;                              // Attempt to carry on
15885             }
15886           }
15887 
15888         default:
15889           assert(0);
15890       }
15891       /* At this point the current range is done, proceed to next. */
15892       first_running_range++;
15893       m_multi_range_result_ptr=
15894         multi_range_next_entry(m_multi_range_result_ptr, table_share->reclength);
15895     }
15896 
15897     if (m_range_res)   // mrr_funcs.next() has consumed all ranges.
15898       DBUG_RETURN(HA_ERR_END_OF_FILE);
15899 
15900     /*
15901       Read remaining ranges
15902     */
15903     if ((res= multi_range_start_retrievals(first_running_range)))
15904       DBUG_RETURN(res);
15905 
15906   } // for(;;)
15907 }
15908 
15909 
15910 /*
15911   Fetch next row from the ordered index cursor in multi range scan.
15912 
15913   We keep the next row in m_next_row, and the range_no of the
15914   next row in m_current_range_no. This is used in sorted index scan
15915   to correctly interleave rows from primary/unique key operations with
15916   rows from the scan.
15917 */
15918 int
read_multi_range_fetch_next()15919 ha_ndbcluster::read_multi_range_fetch_next()
15920 {
15921   DBUG_ENTER("read_multi_range_fetch_next");
15922 
15923   if (m_active_query)
15924   {
15925     DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));
15926     if (!m_next_row)
15927     {
15928       int res= fetch_next_pushed();
15929       if (res == NdbQuery::NextResult_gotRow)
15930       {
15931         m_current_range_no= 0;
15932 //      m_current_range_no= cursor->get_range_no();  // FIXME SPJ, need rangeNo from index scan
15933       }
15934       else if (res == NdbQuery::NextResult_scanComplete)
15935       {
15936         /* We have fetched the last row from the scan. */
15937         m_active_query->close(FALSE);
15938         m_active_query= NULL;
15939         m_next_row= 0;
15940         DBUG_RETURN(0);
15941       }
15942       else
15943       {
15944         /* An error. */
15945         DBUG_RETURN(res);
15946       }
15947     }
15948   }
15949   else if (m_multi_cursor)
15950   {
15951     if (!m_next_row)
15952     {
15953       NdbIndexScanOperation *cursor= m_multi_cursor;
15954       int res= fetch_next(cursor);
15955       if (res == 0)
15956       {
15957         m_current_range_no= cursor->get_range_no();
15958       }
15959       else if (res == 1)
15960       {
15961         /* We have fetched the last row from the scan. */
15962         cursor->close(FALSE, TRUE);
15963         m_active_cursor= 0;
15964         m_multi_cursor= 0;
15965         m_next_row= 0;
15966         DBUG_RETURN(0);
15967       }
15968       else
15969       {
15970         /* An error. */
15971         DBUG_RETURN(res);
15972       }
15973     }
15974   }
15975   DBUG_RETURN(0);
15976 }
15977 
15978 
15979 /**
15980  * Try to find pushable subsets of a join plan.
15981  * @param hton unused (maybe useful for other engines).
15982  * @param thd Thread.
15983  * @param plan The join plan to examine.
15984  * @return Possible error code.
15985  */
15986 
15987 static
ndbcluster_make_pushed_join(handlerton * hton,THD * thd,const AQP::Join_plan * plan)15988 int ndbcluster_make_pushed_join(handlerton *hton,
15989                                 THD* thd,
15990                                 const AQP::Join_plan* plan)
15991 {
15992   DBUG_ENTER("ndbcluster_make_pushed_join");
15993   (void)ha_ndb_ext; // prevents compiler warning.
15994 
15995   if (THDVAR(thd, join_pushdown) &&
15996       // Check for online upgrade/downgrade.
15997       ndb_join_pushdown(g_ndb_cluster_connection->get_min_db_version()))
15998   {
15999     bool pushed_something = false;
16000     ndb_pushed_builder_ctx pushed_builder(*plan);
16001 
16002     for (uint i= 0; i < plan->get_access_count()-1; i++)
16003     {
16004       const AQP::Table_access* const join_root= plan->get_table_access(i);
16005       const ndb_pushed_join* pushed_join= NULL;
16006 
16007       // Try to build a ndb_pushed_join starting from 'join_root'
16008       int error= pushed_builder.make_pushed_join(join_root, pushed_join);
16009       if (unlikely(error))
16010       {
16011         if (error < 0)  // getNdbError() gives us the error code
16012         {
16013           ERR_SET(pushed_builder.getNdbError(),error);
16014         }
16015         join_root->get_table()->file->print_error(error, MYF(0));
16016         DBUG_RETURN(error);
16017       }
16018 
16019       // Assign any produced pushed_join definitions to
16020       // the ha_ndbcluster instance representing its root.
16021       if (pushed_join != NULL)
16022       {
16023         ha_ndbcluster* const handler=
16024           static_cast<ha_ndbcluster*>(join_root->get_table()->file);
16025 
16026         error= handler->assign_pushed_join(pushed_join);
16027         if (unlikely(error))
16028         {
16029           delete pushed_join;
16030           handler->print_error(error, MYF(0));
16031           DBUG_RETURN(error);
16032         }
16033         // Something was pushed and the QEP need to be modified
16034         pushed_something = true;
16035       }
16036     }
16037 
16038     if (pushed_something)
16039     {
16040       // Modify the QEP_TAB's to use the 'linked' read functions
16041       // for those parts of the join which have been pushed down.
16042       for (uint i= 0; i < plan->get_access_count(); i++)
16043       {
16044         plan->get_table_access(i)->set_pushed_table_access_method();
16045       }
16046     }
16047   }
16048   DBUG_RETURN(0);
16049 }
16050 
16051 
16052 /**
16053  * In case a pushed join having the table for this handler as its root
16054  * has been produced. ::assign_pushed_join() is responsible for setting
16055  * up this ha_ndbcluster instance such that the prepared NdbQuery
16056  * might be instantiated at execution time.
16057  */
16058 int
assign_pushed_join(const ndb_pushed_join * pushed_join)16059 ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)
16060 {
16061   DBUG_ENTER("assign_pushed_join");
16062   m_thd_ndb->m_pushed_queries_defined++;
16063 
16064   for (uint i = 0; i < pushed_join->get_operation_count(); i++)
16065   {
16066     const TABLE* const tab= pushed_join->get_table(i);
16067     assert(tab->file->ht == ht);
16068     ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);
16069     child->m_pushed_join_member= pushed_join;
16070     child->m_pushed_join_operation= i;
16071   }
16072 
16073   DBUG_PRINT("info", ("Assigned pushed join with %d child operations",
16074                       pushed_join->get_operation_count()-1));
16075 
16076   DBUG_RETURN(0);
16077 }
16078 
16079 
16080 /**
16081  * First level of filtering tables which *maybe* may be part of
16082  * a pushed query: Returning 'false' will eliminate this table
16083  * from being a part of a pushed join.
16084  * A 'reason' for rejecting this table is required if 'false'
16085  * is returned.
16086  */
16087 bool
maybe_pushable_join(const char * & reason) const16088 ha_ndbcluster::maybe_pushable_join(const char*& reason) const
16089 {
16090   reason= NULL;
16091   if (uses_blob_value(table->read_set))
16092   {
16093     reason= "select list can't contain BLOB columns";
16094     return false;
16095   }
16096   if (m_user_defined_partitioning)
16097   {
16098     reason= "has user defined partioning";
16099     return false;
16100   }
16101 
16102   // Pushed operations may not set locks.
16103   const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);
16104   switch (lockMode)
16105   {
16106   case NdbOperation::LM_CommittedRead:
16107     return true;
16108 
16109   case NdbOperation::LM_Read:
16110   case NdbOperation::LM_Exclusive:
16111     reason= "lock modes other than 'read committed' not implemented";
16112     return false;
16113 
16114   default: // Other lock modes not used by handler.
16115     assert(false);
16116     return false;
16117   }
16118 
16119   return true;
16120 }
16121 
16122 /**
16123  * Check if this table access operation (and a number of succeding operation)
16124  * can be pushed to the cluster and executed there. This requires that there
16125  * is an NdbQueryDefiniton and that it still matches the corresponds to the
16126  * type of operation that we intend to execute. (The MySQL server will
16127  * sometimes change its mind and replace a scan with a lookup or vice versa
16128  * as it works its way into the nested loop join.)
16129  *
16130  * @param type This is the operation type that the server want to execute.
16131  * @param idx  Index used whenever relevant for operation type
16132  * @param needSorted True if the root operation is an ordered index scan
16133  * with sorted results.
16134  * @return True if the operation may be pushed.
16135  */
16136 bool
check_if_pushable(int type,uint idx) const16137 ha_ndbcluster::check_if_pushable(int type,  //NdbQueryOperationDef::Type,
16138                                  uint idx) const
16139 {
16140   if (m_disable_pushed_join)
16141   {
16142     DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
16143     return false;
16144   }
16145   return   m_pushed_join_operation == PUSHED_ROOT
16146         && m_pushed_join_member    != NULL
16147         && m_pushed_join_member->match_definition(
16148                         type,
16149                         (idx<MAX_KEY) ? &m_index[idx] : NULL);
16150 }
16151 
16152 
16153 int
create_pushed_join(const NdbQueryParamValue * keyFieldParams,uint paramCnt)16154 ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)
16155 {
16156   DBUG_ENTER("create_pushed_join");
16157   assert(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);
16158 
16159   NdbQuery* const query=
16160     m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);
16161 
16162   if (unlikely(query==NULL))
16163     ERR_RETURN(m_thd_ndb->trans->getNdbError());
16164 
16165   // Bind to instantiated NdbQueryOperations.
16166   for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)
16167   {
16168     const TABLE* const tab= m_pushed_join_member->get_table(i);
16169     ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);
16170 
16171     assert(handler->m_pushed_join_operation==(int)i);
16172     NdbQueryOperation* const op= query->getQueryOperation(i);
16173     handler->m_pushed_operation= op;
16174 
16175     // Bind to result buffers
16176     const NdbRecord* const resultRec= handler->m_ndb_record;
16177     int res= op->setResultRowRef(
16178                         resultRec,
16179                         handler->_m_next_row,
16180                         (uchar *)(tab->read_set->bitmap));
16181     if (unlikely(res))
16182       ERR_RETURN(query->getNdbError());
16183 
16184     // We clear 'm_next_row' to say that no row was fetched from the query yet.
16185     handler->_m_next_row= 0;
16186   }
16187 
16188   assert(m_active_query==NULL);
16189   m_active_query= query;
16190   m_thd_ndb->m_pushed_queries_executed++;
16191 
16192   DBUG_RETURN(0);
16193 }
16194 
16195 
16196 /**
16197  * Check if this table access operation is part of a pushed join operation
16198  * which is actively executing.
16199  */
16200 bool
check_is_pushed() const16201 ha_ndbcluster::check_is_pushed() const
16202 {
16203   if (m_pushed_join_member == NULL)
16204     return false;
16205 
16206   handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;
16207   return (static_cast<ha_ndbcluster*>(root)->m_active_query);
16208 }
16209 
16210 uint
number_of_pushed_joins() const16211 ha_ndbcluster::number_of_pushed_joins() const
16212 {
16213   if (m_pushed_join_member == NULL)
16214     return 0;
16215   else
16216     return m_pushed_join_member->get_operation_count();
16217 }
16218 
16219 const TABLE*
root_of_pushed_join() const16220 ha_ndbcluster::root_of_pushed_join() const
16221 {
16222   if (m_pushed_join_member == NULL)
16223     return NULL;
16224   else
16225     return m_pushed_join_member->get_table(PUSHED_ROOT);
16226 }
16227 
16228 const TABLE*
parent_of_pushed_join() const16229 ha_ndbcluster::parent_of_pushed_join() const
16230 {
16231   if (m_pushed_join_operation > PUSHED_ROOT)
16232   {
16233     assert(m_pushed_join_member!=NULL);
16234     uint parent_ix= m_pushed_join_member
16235                     ->get_query_def().getQueryOperation(m_pushed_join_operation)
16236                     ->getParentOperation(0)
16237                     ->getOpNo();
16238     return m_pushed_join_member->get_table(parent_ix);
16239   }
16240   return NULL;
16241 }
16242 
16243 /**
16244   Utility thread main loop.
16245 */
Ndb_util_thread()16246 Ndb_util_thread::Ndb_util_thread()
16247   : Ndb_component("Util")
16248 {
16249   native_mutex_init(&LOCK, MY_MUTEX_INIT_FAST);
16250   native_cond_init(&COND);
16251 }
16252 
~Ndb_util_thread()16253 Ndb_util_thread::~Ndb_util_thread()
16254 {
16255   native_mutex_destroy(&LOCK);
16256   native_cond_destroy(&COND);
16257 }
16258 
do_wakeup()16259 void Ndb_util_thread::do_wakeup()
16260 {
16261   // Wakeup from potential wait
16262   log_info("Wakeup");
16263 
16264   native_mutex_lock(&LOCK);
16265   native_cond_signal(&COND);
16266   native_mutex_unlock(&LOCK);
16267 }
16268 
16269 
ndb_util_thread_stop(void)16270 void ndb_util_thread_stop(void)
16271 {
16272   ndb_util_thread.stop();
16273 }
16274 
16275 #include "ndb_log.h"
16276 
16277 void
do_run()16278 Ndb_util_thread::do_run()
16279 {
16280   THD *thd; /* needs to be first for thread_stack */
16281   struct timespec abstime;
16282   Thd_ndb *thd_ndb= NULL;
16283   uint share_list_size= 0;
16284   NDB_SHARE **share_list= NULL;
16285 
16286   DBUG_ENTER("ndb_util_thread");
16287   DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));
16288 
16289   log_info("Starting...");
16290 
16291   native_mutex_lock(&LOCK);
16292 
16293   thd= new THD; /* note that contructor of THD uses DBUG_ */
16294   if (thd == NULL)
16295   {
16296     set_my_errno(HA_ERR_OUT_OF_MEM);
16297     DBUG_VOID_RETURN;
16298   }
16299   THD_CHECK_SENTRY(thd);
16300 
16301   thd->thread_stack= (char*)&thd; /* remember where our stack is */
16302   if (thd->store_globals())
16303     goto ndb_util_thread_fail;
16304   thd_set_command(thd, COM_DAEMON);
16305 #ifndef NDB_THD_HAS_NO_VERSION
16306   thd->version=refresh_version;
16307 #endif
16308   thd->get_protocol_classic()->set_client_capabilities(0);
16309   thd->security_context()->skip_grants();
16310   thd->get_protocol_classic()->init_net((st_vio *) 0);
16311 
16312   CHARSET_INFO *charset_connection;
16313   charset_connection= get_charset_by_csname("utf8",
16314                                             MY_CS_PRIMARY, MYF(MY_WME));
16315   thd->variables.character_set_client= charset_connection;
16316   thd->variables.character_set_results= charset_connection;
16317   thd->variables.collation_connection= charset_connection;
16318   thd->update_charset();
16319 
16320   native_mutex_unlock(&LOCK);
16321 
16322   log_info("Wait for server start completed");
16323   /*
16324     wait for mysql server to start
16325   */
16326   mysql_mutex_lock(&LOCK_server_started);
16327   while (!mysqld_server_started)
16328   {
16329     set_timespec(&abstime, 1);
16330     mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,
16331                          &abstime);
16332     if (is_stop_requested())
16333     {
16334       mysql_mutex_unlock(&LOCK_server_started);
16335       native_mutex_lock(&LOCK);
16336       goto ndb_util_thread_end;
16337     }
16338   }
16339   mysql_mutex_unlock(&LOCK_server_started);
16340 
16341   // Defer call of THD::init_for_query until after mysqld_server_started
16342   // to ensure that the parts of MySQL Server it uses has been created
16343   thd->init_for_queries();
16344 
16345   log_info("Wait for cluster to start");
16346   /*
16347     Wait for cluster to start
16348   */
16349   native_mutex_lock(&LOCK);
16350   while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
16351   {
16352     /* ndb not connected yet */
16353     native_cond_wait(&COND, &LOCK);
16354     if (is_stop_requested())
16355       goto ndb_util_thread_end;
16356   }
16357   native_mutex_unlock(&LOCK);
16358 
16359   /* Get thd_ndb for this thread */
16360   if (!(thd_ndb= Thd_ndb::seize(thd)))
16361   {
16362     sql_print_error("Could not allocate Thd_ndb object");
16363     native_mutex_lock(&LOCK);
16364     goto ndb_util_thread_end;
16365   }
16366   thd_set_thd_ndb(thd, thd_ndb);
16367   thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
16368 
16369   if (opt_ndb_extra_logging && ndb_binlog_running)
16370     sql_print_information("NDB Binlog: Ndb tables initially read only.");
16371 
16372   log_info("Started");
16373 
16374   set_timespec(&abstime, 0);
16375   for (;;)
16376   {
16377     native_mutex_lock(&LOCK);
16378     if (!is_stop_requested())
16379       native_cond_timedwait(&COND,
16380                              &LOCK,
16381                              &abstime);
16382     if (is_stop_requested()) /* Stopping thread */
16383       goto ndb_util_thread_end;
16384     native_mutex_unlock(&LOCK);
16385 #ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
16386     DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",
16387                                    opt_ndb_cache_check_time));
16388 #endif
16389 
16390     /*
16391       Check if the Ndb object in thd_ndb is still valid(it will be
16392       invalid if connection to cluster has been lost) and recycle
16393       it if necessary.
16394     */
16395     if (!check_ndb_in_thd(thd, false))
16396     {
16397       set_timespec(&abstime, 1);
16398       continue;
16399     }
16400 
16401     /*
16402       Regularly give the ndb_binlog component chance to set it self up
16403       i.e at first start it needs to create the ndb_* system tables
16404       and setup event operations on those. In case of lost connection
16405       to cluster, the ndb_* system tables are hopefully still there
16406       but the event operations need to be recreated.
16407     */
16408     if (!ndb_binlog_setup(thd))
16409     {
16410       /* Failed to setup binlog, try again in 1 second */
16411       set_timespec(&abstime, 1);
16412       continue;
16413     }
16414 
16415     if (opt_ndb_cache_check_time == 0)
16416     {
16417       /* Wake up in 1 second to check if value has changed */
16418       set_timespec(&abstime, 1);
16419       continue;
16420     }
16421 
16422     /* Lock mutex and fill list with pointers to all open tables */
16423     NDB_SHARE *share;
16424     native_mutex_lock(&ndbcluster_mutex);
16425     uint i, open_count, record_count= ndbcluster_open_tables.records;
16426     if (share_list_size < record_count)
16427     {
16428       NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
16429       if (!new_share_list)
16430       {
16431         sql_print_warning("ndb util thread: malloc failure, "
16432                           "query cache not maintained properly");
16433         native_mutex_unlock(&ndbcluster_mutex);
16434         goto next;                               // At least do not crash
16435       }
16436       delete [] share_list;
16437       share_list_size= record_count;
16438       share_list= new_share_list;
16439     }
16440     for (i= 0, open_count= 0; i < record_count; i++)
16441     {
16442       share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);
16443       if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
16444           <= 0)
16445         continue; // injector thread is the only user, skip statistics
16446       /* ndb_share reference temporary, free below */
16447       share->use_count++; /* Make sure the table can't be closed */
16448       share->util_thread= true;
16449       DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
16450                                share->key_string(), share->use_count));
16451       DBUG_PRINT("ndb_util_thread",
16452                  ("Found open table[%d]: %s, use_count: %d",
16453                   i, share->table_name, share->use_count));
16454 
16455       /* Store pointer to table */
16456       share_list[open_count++]= share;
16457     }
16458     native_mutex_unlock(&ndbcluster_mutex);
16459 
16460     /* Iterate through the open files list */
16461     for (i= 0; i < open_count; i++)
16462     {
16463       share= share_list[i];
16464       if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
16465           <= 1)
16466       {
16467         /*
16468           Util thread and injector thread is the only user, skip statistics
16469 	*/
16470         /* ndb_share reference temporary free */
16471         DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
16472                                  share->key_string(), share->use_count));
16473 
16474         native_mutex_lock(&ndbcluster_mutex);
16475         share->util_thread= false;
16476         free_share(&share, true);
16477         native_mutex_unlock(&ndbcluster_mutex);
16478         continue;
16479       }
16480       DBUG_PRINT("ndb_util_thread",
16481                  ("Fetching commit count for: %s", share->key_string()));
16482 
16483       struct Ndb_statistics stat;
16484       uint lock;
16485       native_mutex_lock(&share->mutex);
16486       lock= share->commit_count_lock;
16487       native_mutex_unlock(&share->mutex);
16488       {
16489         /* Contact NDB to get commit count for table */
16490         Ndb* ndb= thd_ndb->ndb;
16491         if (ndb->setDatabaseName(share->db))
16492         {
16493           goto loop_next;
16494         }
16495         Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
16496         if (ndbtab_g.get_table() &&
16497             ndb_get_table_statistics(thd, NULL, FALSE, ndb,
16498                                      ndbtab_g.get_table()->getDefaultRecord(),
16499                                      &stat) == 0)
16500         {
16501           DBUG_PRINT("info", ("Table: %s, commit_count: %llu,  rows: %llu",
16502                               share->key_string(),
16503                               stat.commit_count, stat.row_count));
16504         }
16505         else
16506         {
16507           DBUG_PRINT("ndb_util_thread",
16508                      ("Error: Could not get commit count for table %s",
16509                       share->key_string()));
16510           stat.commit_count= 0;
16511         }
16512       }
16513   loop_next:
16514       native_mutex_lock(&share->mutex);
16515       if (share->commit_count_lock == lock)
16516         share->commit_count= stat.commit_count;
16517       native_mutex_unlock(&share->mutex);
16518 
16519       /* ndb_share reference temporary free */
16520       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
16521                                share->key_string(), share->use_count));
16522       native_mutex_lock(&ndbcluster_mutex);
16523       share->util_thread= false;
16524       free_share(&share, true);
16525       native_mutex_unlock(&ndbcluster_mutex);
16526     }
16527 next:
16528     /* Calculate new time to wake up */
16529     set_timespec_nsec(&abstime, opt_ndb_cache_check_time * 1000000ULL);
16530   }
16531 
16532   log_info("Stopping...");
16533 
16534   native_mutex_lock(&LOCK);
16535 
16536 ndb_util_thread_end:
16537   thd->get_protocol_classic()->end_net();
16538 ndb_util_thread_fail:
16539   if (share_list)
16540     delete [] share_list;
16541   if (thd_ndb)
16542   {
16543     Thd_ndb::release(thd_ndb);
16544     thd_set_thd_ndb(thd, NULL);
16545   }
16546   delete thd;
16547 
16548   native_mutex_unlock(&LOCK);
16549   DBUG_PRINT("exit", ("ndb_util_thread"));
16550 
16551   log_info("Stopped");
16552 
16553   DBUG_VOID_RETURN;
16554 }
16555 
16556 /*
16557   Condition pushdown
16558 */
16559 /**
16560   Push a condition to ndbcluster storage engine for evaluation
16561   during table   and index scans. The conditions will be stored on a stack
16562   for possibly storing several conditions. The stack can be popped
16563   by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
16564   will clear the stack.
16565   The current implementation supports arbitrary AND/OR nested conditions
16566   with comparisons between columns and constants (including constant
16567   expressions and function calls) and the following comparison operators:
16568   =, !=, >, >=, <, <=, "is null", and "is not null".
16569 
16570   @retval
16571     NULL The condition was supported and will be evaluated for each
16572          row found during the scan
16573   @retval
16574     cond The condition was not supported and all rows will be returned from
16575          the scan for evaluation (and thus not saved on stack)
16576 */
16577 const
16578 Item*
cond_push(const Item * cond)16579 ha_ndbcluster::cond_push(const Item *cond)
16580 {
16581   DBUG_ENTER("ha_ndbcluster::cond_push");
16582 
16583 #if 1
16584   if (cond->used_tables() & ~table->pos_in_table_list->map())
16585   {
16586     /**
16587      * 'cond' refers fields from other tables, or other instances
16588      * of this table, -> reject it.
16589      * (Optimizer need to have a better understanding of what is
16590      *  pushable by each handler.)
16591      */
16592     DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););
16593     DBUG_RETURN(cond);
16594   }
16595 #else
16596   /*
16597     Make sure that 'cond' does not refer field(s) from other tables
16598     or other instances of this table.
16599     (This was a legacy bug in optimizer)
16600   */
16601   assert(!(cond->used_tables() & ~table->pos_in_table_list->map()));
16602 #endif
16603   if (!m_cond)
16604     m_cond= new ha_ndbcluster_cond;
16605   if (!m_cond)
16606   {
16607     set_my_errno(HA_ERR_OUT_OF_MEM);
16608     DBUG_RETURN(cond);
16609   }
16610   DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););
16611   DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
16612 }
16613 
16614 /**
16615   Pop the top condition from the condition stack of the handler instance.
16616 */
16617 void
cond_pop()16618 ha_ndbcluster::cond_pop()
16619 {
16620   if (m_cond)
16621     m_cond->cond_pop();
16622 }
16623 
16624 
16625 /*
16626   Implements the SHOW ENGINE NDB STATUS command.
16627 */
16628 bool
ndbcluster_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)16629 ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
16630                        enum ha_stat_type stat_type)
16631 {
16632   char name[16];
16633   char buf[IO_SIZE];
16634   uint buflen;
16635   DBUG_ENTER("ndbcluster_show_status");
16636 
16637   if (stat_type != HA_ENGINE_STATUS)
16638   {
16639     DBUG_RETURN(FALSE);
16640   }
16641 
16642   Ndb* ndb= check_ndb_in_thd(thd);
16643   Thd_ndb *thd_ndb= get_thd_ndb(thd);
16644   struct st_ndb_status ns;
16645   if (ndb)
16646     update_status_variables(thd_ndb, &ns, thd_ndb->connection);
16647   else
16648     update_status_variables(NULL, &ns, g_ndb_cluster_connection);
16649 
16650   buflen= (uint)
16651     my_snprintf(buf, sizeof(buf),
16652                 "cluster_node_id=%ld, "
16653                 "connected_host=%s, "
16654                 "connected_port=%ld, "
16655                 "number_of_data_nodes=%ld, "
16656                 "number_of_ready_data_nodes=%ld, "
16657                 "connect_count=%ld",
16658                 ns.cluster_node_id,
16659                 ns.connected_host,
16660                 ns.connected_port,
16661                 ns.number_of_data_nodes,
16662                 ns.number_of_ready_data_nodes,
16663                 ns.connect_count);
16664   if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16665                  STRING_WITH_LEN("connection"), buf, buflen))
16666     DBUG_RETURN(TRUE);
16667 
16668   for (int i= 0; i < MAX_NDB_NODES; i++)
16669   {
16670     if (ns.transaction_hint_count[i] > 0 ||
16671         ns.transaction_no_hint_count[i] > 0)
16672     {
16673       uint namelen= (uint)my_snprintf(name, sizeof(name), "node[%d]", i);
16674       buflen= (uint)my_snprintf(buf, sizeof(buf),
16675                           "transaction_hint=%ld, transaction_no_hint=%ld",
16676                           ns.transaction_hint_count[i],
16677                           ns.transaction_no_hint_count[i]);
16678       if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16679                      name, namelen, buf, buflen))
16680         DBUG_RETURN(TRUE);
16681     }
16682   }
16683 
16684   if (ndb)
16685   {
16686     Ndb::Free_list_usage tmp;
16687     tmp.m_name= 0;
16688     while (ndb->get_free_list_usage(&tmp))
16689     {
16690       buflen= (uint)
16691         my_snprintf(buf, sizeof(buf),
16692                   "created=%u, free=%u, sizeof=%u",
16693                   tmp.m_created, tmp.m_free, tmp.m_sizeof);
16694       if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16695                      tmp.m_name, (uint)strlen(tmp.m_name), buf, buflen))
16696         DBUG_RETURN(TRUE);
16697     }
16698   }
16699 
16700   buflen = (uint)ndbcluster_show_status_binlog(buf, sizeof(buf));
16701   if (buflen)
16702   {
16703     if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
16704                    STRING_WITH_LEN("binlog"), buf, buflen))
16705       DBUG_RETURN(TRUE);
16706   }
16707 
16708   DBUG_RETURN(FALSE);
16709 }
16710 
16711 
get_default_num_partitions(HA_CREATE_INFO * create_info)16712 int ha_ndbcluster::get_default_num_partitions(HA_CREATE_INFO *create_info)
16713 {
16714   if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))
16715   {
16716 err:
16717     my_error(HA_ERR_NO_CONNECTION, MYF(0));
16718     return -1;
16719   }
16720 
16721   THD* thd = current_thd;
16722   if (thd == 0)
16723     goto err;
16724   Thd_ndb * thd_ndb = get_thd_ndb(thd);
16725   if (thd_ndb == 0)
16726     goto err;
16727 
16728   ha_rows max_rows, min_rows;
16729   if (create_info)
16730   {
16731     max_rows= create_info->max_rows;
16732     min_rows= create_info->min_rows;
16733   }
16734   else
16735   {
16736     max_rows= table_share->max_rows;
16737     min_rows= table_share->min_rows;
16738   }
16739   uint no_fragments= get_no_fragments(max_rows >= min_rows ?
16740                                       max_rows : min_rows);
16741   uint reported_frags;
16742   adjusted_frag_count(thd_ndb->ndb,
16743                       no_fragments,
16744                       reported_frags);
16745   return reported_frags;
16746 }
16747 
calculate_key_hash_value(Field ** field_array)16748 uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)
16749 {
16750   Uint32 hash_value;
16751   struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];
16752   struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];
16753   Uint32 i= 0;
16754   int ret_val;
16755   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
16756   void *buf= (void*)&tmp[0];
16757   DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");
16758 
16759   do
16760   {
16761     Field *field= *field_array;
16762     uint len= field->data_length();
16763     assert(!field->is_real_null());
16764     if (field->real_type() == MYSQL_TYPE_VARCHAR)
16765       len+= ((Field_varstring*)field)->length_bytes;
16766     key_data[i].ptr= field->ptr;
16767     key_data[i++].len= len;
16768   } while (*(++field_array));
16769   key_data[i].ptr= 0;
16770   if ((ret_val= Ndb::computeHash(&hash_value, m_table,
16771                                  key_data_ptr, buf, sizeof(tmp))))
16772   {
16773     DBUG_PRINT("info", ("ret_val = %d", ret_val));
16774     assert(FALSE);
16775     abort();
16776   }
16777   DBUG_RETURN(hash_value);
16778 }
16779 
16780 
16781 /*
16782   Set-up auto-partitioning for NDB Cluster
16783 
16784   SYNOPSIS
16785     set_auto_partitions()
16786     part_info                  Partition info struct to set-up
16787 
16788   RETURN VALUE
16789     NONE
16790 
16791   DESCRIPTION
16792     Set-up auto partitioning scheme for tables that didn't define any
16793     partitioning. We'll use PARTITION BY KEY() in this case which
16794     translates into partition by primary key if a primary key exists
16795     and partition by hidden key otherwise.
16796 */
16797 
16798 enum ndb_distribution_enum {
16799   NDB_DISTRIBUTION_KEYHASH= 0,
16800   NDB_DISTRIBUTION_LINHASH= 1
16801 };
16802 static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };
16803 static ulong opt_ndb_distribution;
16804 static TYPELIB distribution_typelib= {
16805   array_elements(distribution_names) - 1,
16806   "",
16807   distribution_names,
16808   NULL
16809 };
16810 static MYSQL_SYSVAR_ENUM(
16811   distribution,                      /* name */
16812   opt_ndb_distribution,              /* var */
16813   PLUGIN_VAR_RQCMDARG,
16814   "Default distribution for new tables in ndb",
16815   NULL,                              /* check func. */
16816   NULL,                              /* update func. */
16817   NDB_DISTRIBUTION_KEYHASH,          /* default */
16818   &distribution_typelib              /* typelib */
16819 );
16820 
16821 
set_auto_partitions(partition_info * part_info)16822 void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
16823 {
16824   DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
16825   part_info->list_of_part_fields= TRUE;
16826   part_info->part_type= HASH_PARTITION;
16827   switch (opt_ndb_distribution)
16828   {
16829   case NDB_DISTRIBUTION_KEYHASH:
16830     part_info->linear_hash_ind= FALSE;
16831     break;
16832   case NDB_DISTRIBUTION_LINHASH:
16833     part_info->linear_hash_ind= TRUE;
16834     break;
16835   default:
16836     assert(false);
16837     break;
16838   }
16839   DBUG_VOID_RETURN;
16840 }
16841 
16842 
16843 static int
create_table_set_range_data(const partition_info * part_info,NdbDictionary::Table & ndbtab)16844 create_table_set_range_data(const partition_info *part_info,
16845                             NdbDictionary::Table& ndbtab)
16846 {
16847   const uint num_parts = part_info->num_parts;
16848   DBUG_ENTER("create_table_set_range_data");
16849 
16850   int32 *range_data= (int32*)my_malloc(PSI_INSTRUMENT_ME, num_parts*sizeof(int32), MYF(0));
16851   if (!range_data)
16852   {
16853     mem_alloc_error(num_parts*sizeof(int32));
16854     DBUG_RETURN(1);
16855   }
16856   for (uint i= 0; i < num_parts; i++)
16857   {
16858     longlong range_val= part_info->range_int_array[i];
16859     const bool unsigned_flag= part_info->part_expr->unsigned_flag;
16860     if (unsigned_flag)
16861       range_val-= 0x8000000000000000ULL;
16862     if (range_val < INT_MIN32 || range_val >= INT_MAX32)
16863     {
16864       if ((i != num_parts - 1) ||
16865           (range_val != LLONG_MAX))
16866       {
16867         my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
16868         my_free((char*)range_data, MYF(0));
16869         DBUG_RETURN(1);
16870       }
16871       range_val= INT_MAX32;
16872     }
16873     range_data[i]= (int32)range_val;
16874   }
16875   ndbtab.setRangeListData(range_data, num_parts);
16876   my_free((char*)range_data, MYF(0));
16877   DBUG_RETURN(0);
16878 }
16879 
16880 
16881 static int
create_table_set_list_data(const partition_info * part_info,NdbDictionary::Table & ndbtab)16882 create_table_set_list_data(const partition_info *part_info,
16883                            NdbDictionary::Table& ndbtab)
16884 {
16885   const uint num_list_values = part_info->num_list_values;
16886   int32 *list_data= (int32*)my_malloc(PSI_INSTRUMENT_ME,
16887                                       num_list_values*2*sizeof(int32), MYF(0));
16888   DBUG_ENTER("create_table_set_list_data");
16889 
16890   if (!list_data)
16891   {
16892     mem_alloc_error(num_list_values*2*sizeof(int32));
16893     DBUG_RETURN(1);
16894   }
16895   for (uint i= 0; i < num_list_values; i++)
16896   {
16897     LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
16898     longlong list_val= list_entry->list_value;
16899     const bool unsigned_flag= part_info->part_expr->unsigned_flag;
16900     if (unsigned_flag)
16901       list_val-= 0x8000000000000000ULL;
16902     if (list_val < INT_MIN32 || list_val > INT_MAX32)
16903     {
16904       my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
16905       my_free((char*)list_data, MYF(0));
16906       DBUG_RETURN(1);
16907     }
16908     list_data[2*i]= (int32)list_val;
16909     list_data[2*i+1]= list_entry->partition_id;
16910   }
16911   ndbtab.setRangeListData(list_data, 2*num_list_values);
16912   my_free((char*)list_data, MYF(0));
16913   DBUG_RETURN(0);
16914 }
16915 
16916 /*
16917   User defined partitioning set-up. We need to check how many fragments the
16918   user wants defined and which node groups to put those into.
16919 
16920   All the functionality of the partition function, partition limits and so
16921   forth are entirely handled by the MySQL Server. There is one exception to
16922   this rule for PARTITION BY KEY where NDB handles the hash function and
16923   this type can thus be handled transparently also by NDB API program.
16924   For RANGE, HASH and LIST and subpartitioning the NDB API programs must
16925   implement the function to map to a partition.
16926 */
16927 
16928 static int
create_table_set_up_partition_info(HA_CREATE_INFO * create_info,partition_info * part_info,NdbDictionary::Table & ndbtab)16929 create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
16930                                    partition_info *part_info,
16931                                    NdbDictionary::Table& ndbtab)
16932 {
16933   DBUG_ENTER("create_table_set_up_partition_info");
16934 
16935   if (part_info->part_type == HASH_PARTITION &&
16936       part_info->list_of_part_fields == TRUE)
16937   {
16938     Field **fields= part_info->part_field_array;
16939 
16940     DBUG_PRINT("info", ("Using HashMapPartition fragmentation type"));
16941     ndbtab.setFragmentType(NDBTAB::HashMapPartition);
16942 
16943     for (uint i= 0; i < part_info->part_field_list.elements; i++)
16944     {
16945       NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);
16946       DBUG_PRINT("info",("setting dist key on %s", col->getName()));
16947       col->setPartitionKey(TRUE);
16948     }
16949   }
16950   else
16951   {
16952     if (!current_thd->variables.new_mode)
16953     {
16954       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
16955                           ER_ILLEGAL_HA_CREATE_OPTION,
16956                           ER(ER_ILLEGAL_HA_CREATE_OPTION),
16957                           ndbcluster_hton_name,
16958                           "LIST, RANGE and HASH partition disabled by default,"
16959                           " use --new option to enable");
16960       DBUG_RETURN(HA_ERR_UNSUPPORTED);
16961     }
16962    /*
16963       Create a shadow field for those tables that have user defined
16964       partitioning. This field stores the value of the partition
16965       function such that NDB can handle reorganisations of the data
16966       even when the MySQL Server isn't available to assist with
16967       calculation of the partition function value.
16968     */
16969     NDBCOL col;
16970     DBUG_PRINT("info", ("Generating partition func value field"));
16971     col.setName("$PART_FUNC_VALUE");
16972     col.setType(NdbDictionary::Column::Int);
16973     col.setLength(1);
16974     col.setNullable(FALSE);
16975     col.setPrimaryKey(FALSE);
16976     col.setAutoIncrement(FALSE);
16977     ndbtab.addColumn(col);
16978     if (part_info->part_type == RANGE_PARTITION)
16979     {
16980       const int error = create_table_set_range_data(part_info, ndbtab);
16981       if (error)
16982       {
16983         DBUG_RETURN(error);
16984       }
16985     }
16986     else if (part_info->part_type == LIST_PARTITION)
16987     {
16988       const int error = create_table_set_list_data(part_info, ndbtab);
16989       if (error)
16990       {
16991         DBUG_RETURN(error);
16992       }
16993     }
16994 
16995     DBUG_PRINT("info", ("Using UserDefined fragmentation type"));
16996     ndbtab.setFragmentType(NDBTAB::UserDefined);
16997   }
16998 
16999   const bool use_default_num_parts = part_info->use_default_num_partitions;
17000   ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);
17001   ndbtab.setLinearFlag(part_info->linear_hash_ind);
17002   {
17003     ha_rows max_rows= create_info->max_rows;
17004     ha_rows min_rows= create_info->min_rows;
17005     if (max_rows < min_rows)
17006       max_rows= min_rows;
17007     if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
17008     {
17009       ndbtab.setMaxRows(max_rows);
17010       ndbtab.setMinRows(min_rows);
17011     }
17012   }
17013 
17014   {
17015     // Count number of fragments to use for the table and
17016     // build array describing which nodegroup should store each
17017     // partition(each partition is mapped to one fragment in the table).
17018     uint32 frag_data[MAX_PARTITIONS];
17019     ulong fd_index= 0;
17020 
17021     partition_element *part_elem;
17022     List_iterator<partition_element> part_it(part_info->partitions);
17023     while((part_elem = part_it++))
17024     {
17025       if (!part_info->is_sub_partitioned())
17026       {
17027         const Uint32 ng= part_elem->nodegroup_id;
17028         assert(fd_index < NDB_ARRAY_SIZE(frag_data));
17029         frag_data[fd_index++]= ng;
17030       }
17031       else
17032       {
17033         partition_element *subpart_elem;
17034         List_iterator<partition_element> sub_it(part_elem->subpartitions);
17035         while((subpart_elem = sub_it++))
17036         {
17037           const Uint32 ng= subpart_elem->nodegroup_id;
17038           assert(fd_index < NDB_ARRAY_SIZE(frag_data));
17039           frag_data[fd_index++]= ng;
17040         }
17041       }
17042     }
17043 
17044     // Double check number of partitions vs. fragments
17045     assert(part_info->get_tot_partitions() == fd_index);
17046 
17047     ndbtab.setFragmentCount(fd_index);
17048     ndbtab.setFragmentData(frag_data, fd_index);
17049   }
17050   DBUG_RETURN(0);
17051 }
17052 
17053 class NDB_ALTER_DATA : public inplace_alter_handler_ctx
17054 {
17055 public:
NDB_ALTER_DATA(NdbDictionary::Dictionary * dict,const NdbDictionary::Table * table)17056   NDB_ALTER_DATA(NdbDictionary::Dictionary *dict,
17057 		 const NdbDictionary::Table *table) :
17058     dictionary(dict),
17059     old_table(table),
17060     new_table(new NdbDictionary::Table(*table)),
17061     table_id(table->getObjectId()),
17062     old_table_version(table->getObjectVersion())
17063   {}
~NDB_ALTER_DATA()17064   ~NDB_ALTER_DATA()
17065   { delete new_table; }
17066   NdbDictionary::Dictionary *dictionary;
17067   const  NdbDictionary::Table *old_table;
17068   NdbDictionary::Table *new_table;
17069   Uint32 table_id;
17070   Uint32 old_table_version;
17071 };
17072 
17073 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17074   ha_ndbcluster::check_if_supported_inplace_alter(TABLE *altered_table,
17075                                                   Alter_inplace_info *ha_alter_info)
17076 {
17077   THD *thd= current_thd;
17078   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17079   Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17080       ha_alter_info->handler_flags;
17081   const Alter_inplace_info::HA_ALTER_FLAGS supported=
17082     Alter_inplace_info::ADD_INDEX |
17083     Alter_inplace_info::DROP_INDEX |
17084     Alter_inplace_info::ADD_UNIQUE_INDEX |
17085     Alter_inplace_info::DROP_UNIQUE_INDEX |
17086     Alter_inplace_info::ADD_COLUMN |
17087     Alter_inplace_info::ALTER_COLUMN_DEFAULT |
17088     Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE |
17089     Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT |
17090     Alter_inplace_info::ADD_PARTITION |
17091     Alter_inplace_info::ALTER_TABLE_REORG |
17092     Alter_inplace_info::CHANGE_CREATE_OPTION |
17093     Alter_inplace_info::ADD_FOREIGN_KEY |
17094     Alter_inplace_info::DROP_FOREIGN_KEY |
17095     Alter_inplace_info::ALTER_INDEX_COMMENT;
17096 
17097   const Alter_inplace_info::HA_ALTER_FLAGS not_supported= ~supported;
17098 
17099   Alter_inplace_info::HA_ALTER_FLAGS add_column=
17100     Alter_inplace_info::ADD_COLUMN;
17101 
17102   const Alter_inplace_info::HA_ALTER_FLAGS adding=
17103     Alter_inplace_info::ADD_INDEX |
17104     Alter_inplace_info::ADD_UNIQUE_INDEX;
17105 
17106   const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17107     Alter_inplace_info::DROP_INDEX |
17108     Alter_inplace_info::DROP_UNIQUE_INDEX;
17109 
17110   enum_alter_inplace_result result= HA_ALTER_INPLACE_SHARED_LOCK;
17111 
17112   DBUG_ENTER("ha_ndbcluster::check_if_supported_inplace_alter");
17113   partition_info *part_info= altered_table->part_info;
17114   const NDBTAB *old_tab= m_table;
17115 
17116   if (THDVAR(thd, use_copying_alter_table))
17117   {
17118     DBUG_PRINT("info", ("On-line alter table disabled"));
17119     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17120   }
17121 
17122   DBUG_PRINT("info", ("Passed alter flags 0x%llx", alter_flags));
17123   DBUG_PRINT("info", ("Supported 0x%llx", supported));
17124   DBUG_PRINT("info", ("Not supported 0x%llx", not_supported));
17125   DBUG_PRINT("info", ("alter_flags & not_supported 0x%llx",
17126                         alter_flags & not_supported));
17127 
17128   bool auto_increment_value_changed= false;
17129   bool max_rows_changed= false;
17130   if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17131   {
17132     if (create_info->auto_increment_value !=
17133       table->file->stats.auto_increment_value)
17134       auto_increment_value_changed= true;
17135     if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
17136       max_rows_changed= true;
17137   }
17138 
17139   if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17140   {
17141     /*
17142       sql_partition.cc tries to compute what is going on
17143       and sets flags...that we clear
17144     */
17145     if (part_info->use_default_num_partitions)
17146     {
17147       alter_flags= alter_flags & ~Alter_inplace_info::COALESCE_PARTITION;
17148       alter_flags= alter_flags & ~Alter_inplace_info::ADD_PARTITION;
17149     }
17150   }
17151 
17152   if (alter_flags & Alter_inplace_info::ALTER_COLUMN_DEFAULT &&
17153       !(alter_flags & Alter_inplace_info::ADD_COLUMN))
17154   {
17155     DBUG_PRINT("info", ("Altering default value is not supported"));
17156     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17157   }
17158 
17159   if (alter_flags & not_supported)
17160   {
17161     DBUG_PRINT("info", ("Detected unsupported change: 0x%llx",
17162                         alter_flags & not_supported));
17163     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17164   }
17165 
17166   if (alter_flags & Alter_inplace_info::ADD_COLUMN ||
17167       alter_flags & Alter_inplace_info::ADD_PARTITION ||
17168       alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
17169       max_rows_changed)
17170   {
17171      Ndb *ndb= get_ndb(thd);
17172      NDBDICT *dict= ndb->getDictionary();
17173      ndb->setDatabaseName(m_dbname);
17174      NdbDictionary::Table new_tab= *old_tab;
17175 
17176      result= HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
17177      if (alter_flags & Alter_inplace_info::ADD_COLUMN)
17178      {
17179        NDBCOL col;
17180 
17181        /*
17182          Check that we are only adding columns
17183        */
17184        /*
17185          HA_COLUMN_DEFAULT_VALUE & HA_COLUMN_STORAGE & HA_COLUMN_FORMAT
17186          are set if they are specified in an later cmd
17187          even if they're no change. This is probably a bug
17188          conclusion: add them to add_column-mask, so that we silently "accept" them
17189          In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set
17190          which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases
17191        */
17192        add_column|= Alter_inplace_info::ALTER_COLUMN_DEFAULT;
17193        add_column|= Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE;
17194        add_column|= Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT;
17195        if (alter_flags & ~add_column)
17196        {
17197          DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));
17198          DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17199        }
17200        /*
17201          Check for extra fields for hidden primary key
17202          or user defined partitioning
17203        */
17204        if (table_share->primary_key == MAX_KEY ||
17205            part_info->part_type != HASH_PARTITION ||
17206            !part_info->list_of_part_fields)
17207          DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17208 
17209        /* Find the new fields */
17210        for (uint i= table->s->fields; i < altered_table->s->fields; i++)
17211        {
17212          Field *field= altered_table->field[i];
17213          DBUG_PRINT("info", ("Found new field %s", field->field_name));
17214          DBUG_PRINT("info", ("storage_type %i, column_format %i",
17215                              (uint) field->field_storage_type(),
17216                              (uint) field->column_format()));
17217          if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
17218          {
17219            my_ptrdiff_t src_offset= field->table->s->default_values
17220              - field->table->record[0];
17221            if ((! field->is_real_null(src_offset)) ||
17222                ((field->flags & NOT_NULL_FLAG)))
17223            {
17224              DBUG_PRINT("info",("Adding column with non-null default value is not supported on-line"));
17225              DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17226            }
17227          }
17228          /* Create new field to check if it can be added */
17229          set_my_errno(create_ndb_column(thd, col, field, create_info,
17230                                         COLUMN_FORMAT_TYPE_DYNAMIC));
17231          if (my_errno())
17232          {
17233            DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno()));
17234            DBUG_RETURN(HA_ALTER_ERROR);
17235          }
17236          if (new_tab.addColumn(col))
17237          {
17238            set_my_errno(errno);
17239            DBUG_PRINT("info", ("NdbDictionary::Table::addColumn returned %u", my_errno()));
17240            DBUG_RETURN(HA_ALTER_ERROR);
17241          }
17242        }
17243      }
17244 
17245      if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17246      {
17247        /*
17248           Refuse if Max_rows has been used before...
17249           Workaround is to use ALTER ONLINE TABLE <t> MAX_ROWS=<bigger>;
17250        */
17251        if (old_tab->getMaxRows() != 0)
17252        {
17253          push_warning(current_thd,
17254                       Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
17255                       "Cannot online REORGANIZE a table with Max_Rows set.  "
17256                       "Use ALTER TABLE ... MAX_ROWS=<new_val> or offline REORGANIZE "
17257                       "to redistribute this table.");
17258          DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17259        }
17260        new_tab.setFragmentCount(0);
17261        new_tab.setFragmentData(0, 0);
17262      }
17263      else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
17264      {
17265        DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));
17266        new_tab.setFragmentCount(part_info->num_parts);
17267      }
17268      if (max_rows_changed)
17269      {
17270        ulonglong rows= create_info->max_rows;
17271        uint no_fragments= get_no_fragments(rows);
17272        uint reported_frags= no_fragments;
17273        if (adjusted_frag_count(ndb, no_fragments, reported_frags))
17274        {
17275          push_warning(current_thd,
17276                       Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
17277                       "Ndb might have problems storing the max amount "
17278                       "of rows specified");
17279        }
17280        if (reported_frags < old_tab->getFragmentCount())
17281        {
17282          DBUG_PRINT("info", ("Online reduction in number of fragments not supported"));
17283          DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17284        }
17285        new_tab.setFragmentCount(reported_frags);
17286        new_tab.setDefaultNoPartitionsFlag(false);
17287        new_tab.setFragmentData(0, 0);
17288      }
17289 
17290      NDB_Modifiers table_modifiers(ndb_table_modifiers);
17291      table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
17292                            create_info->comment.length);
17293      const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");
17294 
17295      if (mod_nologging->m_found)
17296      {
17297        new_tab.setLogging(!mod_nologging->m_val_bool);
17298      }
17299 
17300      if (dict->supportedAlterTable(*old_tab, new_tab))
17301      {
17302        DBUG_PRINT("info", ("Adding column(s) supported on-line"));
17303      }
17304      else
17305      {
17306        DBUG_PRINT("info",("Adding column not supported on-line"));
17307        DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17308      }
17309   }
17310 
17311   /*
17312     Check that we are not adding multiple indexes
17313   */
17314   if (alter_flags & adding)
17315   {
17316     if (((altered_table->s->keys - table->s->keys) != 1) ||
17317         (alter_flags & dropping))
17318     {
17319        DBUG_PRINT("info",("Only one index can be added on-line"));
17320        DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17321     }
17322   }
17323 
17324   /*
17325     Check that we are not dropping multiple indexes
17326   */
17327   if (alter_flags & dropping)
17328   {
17329     if (((table->s->keys - altered_table->s->keys) != 1) ||
17330         (alter_flags & adding))
17331     {
17332        DBUG_PRINT("info",("Only one index can be dropped on-line"));
17333        DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17334     }
17335   }
17336 
17337   for (uint i= 0; i < table->s->fields; i++)
17338   {
17339     Field *field= table->field[i];
17340     const NDBCOL *col= m_table->getColumn(i);
17341 
17342     NDBCOL new_col;
17343     create_ndb_column(0, new_col, field, create_info);
17344 
17345     bool index_on_column = false;
17346     /**
17347      * Check all indexes to determine if column has index instead of checking
17348      *   field->flags (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG
17349      *   since field->flags appears to only be set on first column in
17350      *   multi-part index
17351      */
17352     for (uint j= 0; j<table->s->keys; j++)
17353     {
17354       KEY* key_info= table->key_info + j;
17355       KEY_PART_INFO* key_part= key_info->key_part;
17356       KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
17357       for (; key_part != end; key_part++)
17358       {
17359         if (key_part->field->field_index == i)
17360         {
17361           index_on_column= true;
17362           j= table->s->keys; // break outer loop
17363           break;
17364         }
17365       }
17366     }
17367 
17368     if (index_on_column == false && (alter_flags & adding))
17369     {
17370       for (uint j= table->s->keys; j<altered_table->s->keys; j++)
17371       {
17372         KEY* key_info= altered_table->key_info + j;
17373         KEY_PART_INFO* key_part= key_info->key_part;
17374         KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
17375         for (; key_part != end; key_part++)
17376         {
17377           if (key_part->field->field_index == i)
17378           {
17379             index_on_column= true;
17380             j= altered_table->s->keys; // break outer loop
17381             break;
17382           }
17383         }
17384       }
17385     }
17386 
17387     /**
17388      * This is a "copy" of code in ::create()
17389      *   that "auto-converts" columns with keys into memory
17390      *   (unless storage disk is explicitly added)
17391      * This is needed to check if getStorageType() == getStorageType()
17392      * further down
17393      */
17394     if (index_on_column)
17395     {
17396       if (field->field_storage_type() == HA_SM_DISK)
17397       {
17398         DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17399       }
17400       new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);
17401     }
17402     else if (field->field_storage_type() == HA_SM_DEFAULT)
17403     {
17404       /**
17405        * If user didn't specify any column format, keep old
17406        *   to make as many alter's as possible online
17407        */
17408       new_col.setStorageType(col->getStorageType());
17409     }
17410 
17411     if (col->getStorageType() != new_col.getStorageType())
17412     {
17413       DBUG_PRINT("info", ("Column storage media is changed"));
17414       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17415     }
17416 
17417     if (field->flags & FIELD_IS_RENAMED)
17418     {
17419       DBUG_PRINT("info", ("Field has been renamed, copy table"));
17420       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17421     }
17422 
17423     if ((field->flags & FIELD_IN_ADD_INDEX) &&
17424         (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))
17425     {
17426       DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
17427       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17428     }
17429   }
17430 
17431   /* Check that only auto_increment value was changed */
17432   if (auto_increment_value_changed)
17433   {
17434     if (create_info->used_fields ^ ~HA_CREATE_USED_AUTO)
17435     {
17436       DBUG_PRINT("info", ("Not only auto_increment value changed"));
17437       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17438     }
17439   }
17440   else
17441   {
17442     /* Check that row format didn't change */
17443     if (create_info->used_fields & HA_CREATE_USED_AUTO &&
17444         get_row_type() != create_info->row_type)
17445     {
17446       DBUG_PRINT("info", ("Row format changed"));
17447       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
17448     }
17449   }
17450   DBUG_PRINT("info", ("Ndb supports ALTER on-line"));
17451   DBUG_RETURN(result);
17452 }
17453 
17454 bool
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17455 ha_ndbcluster::prepare_inplace_alter_table(TABLE *altered_table,
17456                                               Alter_inplace_info *ha_alter_info)
17457 {
17458   int error= 0;
17459   uint i;
17460   THD *thd= current_thd;
17461   Thd_ndb *thd_ndb= get_thd_ndb(thd);
17462   Ndb *ndb= get_ndb(thd);
17463   NDBDICT *dict= ndb->getDictionary();
17464   ndb->setDatabaseName(m_dbname);
17465 
17466   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17467 
17468   const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17469     ha_alter_info->handler_flags;
17470 
17471   const Alter_inplace_info::HA_ALTER_FLAGS adding=
17472     Alter_inplace_info::ADD_INDEX |
17473     Alter_inplace_info::ADD_UNIQUE_INDEX;
17474 
17475   const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17476     Alter_inplace_info::DROP_INDEX |
17477     Alter_inplace_info::DROP_UNIQUE_INDEX;
17478 
17479   DBUG_ENTER("ha_ndbcluster::prepare_inplace_alter_table");
17480 
17481   ha_alter_info->handler_ctx= 0;
17482   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::prepare_inplace_alter_table"))
17483     DBUG_RETURN(true);
17484 
17485   NDB_ALTER_DATA *alter_data;
17486   if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))
17487     DBUG_RETURN(true);
17488 
17489   const NDBTAB* const old_tab = alter_data->old_table;
17490   NdbDictionary::Table * const new_tab = alter_data->new_table;
17491   ha_alter_info->handler_ctx= alter_data;
17492 
17493   DBUG_PRINT("info", ("altered_table: '%s, alter_flags: 0x%llx",
17494                       altered_table->s->table_name.str,
17495                       alter_flags));
17496 
17497   bool auto_increment_value_changed= false;
17498   bool max_rows_changed= false;
17499   if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17500   {
17501     if (create_info->auto_increment_value !=
17502       table->file->stats.auto_increment_value)
17503       auto_increment_value_changed= true;
17504     if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
17505       max_rows_changed= true;
17506   }
17507 
17508   prepare_for_alter();
17509 
17510   if (dict->beginSchemaTrans() == -1)
17511   {
17512     DBUG_PRINT("info", ("Failed to start schema transaction"));
17513     ERR_PRINT(dict->getNdbError());
17514     error= ndb_to_mysql_error(&dict->getNdbError());
17515     table->file->print_error(error, MYF(0));
17516     goto err;
17517   }
17518 
17519   if (alter_flags & adding)
17520   {
17521     KEY           *key_info;
17522     KEY           *key;
17523     uint          *idx_p;
17524     uint          *idx_end_p;
17525     KEY_PART_INFO *key_part;
17526     KEY_PART_INFO *part_end;
17527     DBUG_PRINT("info", ("Adding indexes"));
17528     key_info= (KEY*) thd->alloc(sizeof(KEY) * ha_alter_info->index_add_count);
17529     key= key_info;
17530     for (idx_p=  ha_alter_info->index_add_buffer,
17531 	 idx_end_p= idx_p + ha_alter_info->index_add_count;
17532 	 idx_p < idx_end_p;
17533 	 idx_p++, key++)
17534     {
17535       /* Copy the KEY struct. */
17536       *key= ha_alter_info->key_info_buffer[*idx_p];
17537       /* Fix the key parts. */
17538       part_end= key->key_part + key->user_defined_key_parts;
17539       for (key_part= key->key_part; key_part < part_end; key_part++)
17540 	key_part->field= table->field[key_part->fieldnr];
17541     }
17542     if ((error= add_index_impl(thd, altered_table, key_info,
17543                                ha_alter_info->index_add_count)))
17544     {
17545       /*
17546 	Exchange the key_info for the error message. If we exchange
17547 	key number by key name in the message later, we need correct info.
17548       */
17549       KEY *save_key_info= table->key_info;
17550       table->key_info= key_info;
17551       table->file->print_error(error, MYF(0));
17552       table->key_info= save_key_info;
17553       goto abort;
17554     }
17555   }
17556 
17557   if (alter_flags & dropping)
17558   {
17559     uint          *key_numbers;
17560     uint          *keyno_p;
17561     KEY           **idx_p;
17562     KEY           **idx_end_p;
17563     DBUG_PRINT("info", ("Renumbering indexes"));
17564     /* The prepare_drop_index() method takes an array of key numbers. */
17565     key_numbers= (uint*) thd->alloc(sizeof(uint) * ha_alter_info->index_drop_count);
17566     keyno_p= key_numbers;
17567     /* Get the number of each key. */
17568     for (idx_p= ha_alter_info->index_drop_buffer,
17569 	 idx_end_p= idx_p + ha_alter_info->index_drop_count;
17570 	 idx_p < idx_end_p;
17571 	 idx_p++, keyno_p++)
17572     {
17573       // Find the key number matching the key to be dropped
17574       KEY *keyp= *idx_p;
17575       uint i;
17576       for(i=0; i < table->s->keys; i++)
17577       {
17578 	if (keyp == table->key_info + i)
17579 	  break;
17580       }
17581       DBUG_PRINT("info", ("Dropping index %u", i));
17582       *keyno_p= i;
17583     }
17584     /*
17585       Tell the handler to prepare for drop indexes.
17586       This re-numbers the indexes to get rid of gaps.
17587     */
17588     if ((error= prepare_drop_index(table, key_numbers,
17589 				   ha_alter_info->index_drop_count)))
17590     {
17591       table->file->print_error(error, MYF(0));
17592       goto abort;
17593     }
17594   }
17595 
17596   if (alter_flags &  Alter_inplace_info::ADD_COLUMN)
17597   {
17598      NDBCOL col;
17599 
17600      /* Find the new fields */
17601      for (i= table->s->fields; i < altered_table->s->fields; i++)
17602      {
17603        Field *field= altered_table->field[i];
17604        DBUG_PRINT("info", ("Found new field %s", field->field_name));
17605        set_my_errno(create_ndb_column(thd, col, field, create_info,
17606                                       COLUMN_FORMAT_TYPE_DYNAMIC));
17607        if (my_errno())
17608        {
17609          error= my_errno();
17610          goto abort;
17611        }
17612        /*
17613          If the user has not specified the field format
17614          make it dynamic to enable on-line add attribute
17615        */
17616        if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&
17617            create_info->row_type == ROW_TYPE_DEFAULT &&
17618            col.getDynamic())
17619        {
17620          push_warning_printf(thd, Sql_condition::SL_WARNING,
17621                              ER_ILLEGAL_HA_CREATE_OPTION,
17622                              "Converted FIXED field '%s' to DYNAMIC "
17623                              "to enable on-line ADD COLUMN",
17624                              field->field_name);
17625        }
17626        new_tab->addColumn(col);
17627      }
17628   }
17629 
17630   if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
17631       alter_flags & Alter_inplace_info::ADD_PARTITION ||
17632       max_rows_changed)
17633   {
17634     if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
17635     {
17636       new_tab->setFragmentCount(0);
17637       new_tab->setFragmentData(0, 0);
17638     }
17639     else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
17640     {
17641       partition_info *part_info= altered_table->part_info;
17642       new_tab->setFragmentCount(part_info->num_parts);
17643     }
17644     else if (max_rows_changed)
17645     {
17646       ulonglong rows= create_info->max_rows;
17647       uint no_fragments= get_no_fragments(rows);
17648       uint reported_frags= no_fragments;
17649       if (adjusted_frag_count(ndb, no_fragments, reported_frags))
17650       {
17651         assert(false); /* Checked above */
17652       }
17653       if (reported_frags < old_tab->getFragmentCount())
17654       {
17655         assert(false);
17656         DBUG_RETURN(false);
17657       }
17658       /* Note we don't set the ndb table's max_rows param, as that
17659        * is considered a 'real' change
17660        */
17661       //new_tab->setMaxRows(create_info->max_rows);
17662       new_tab->setFragmentCount(reported_frags);
17663       new_tab->setDefaultNoPartitionsFlag(false);
17664       new_tab->setFragmentData(0, 0);
17665     }
17666 
17667     int res= dict->prepareHashMap(*old_tab, *new_tab);
17668     if (res == -1)
17669     {
17670       const NdbError err= dict->getNdbError();
17671       set_my_errno(ndb_to_mysql_error(&err));
17672       goto abort;
17673     }
17674   }
17675 
17676   if (alter_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
17677   {
17678     int res= create_fks(thd, ndb);
17679     if (res != 0)
17680     {
17681       /*
17682         Unlike CREATE, ALTER for some reason does not translate
17683         the HA_ code.  So fix it to be Innodb compatible.
17684       */
17685       if (res == HA_ERR_CANNOT_ADD_FOREIGN)
17686       {
17687         DBUG_PRINT("info", ("change error %d to %d",
17688                             HA_ERR_CANNOT_ADD_FOREIGN, ER_CANNOT_ADD_FOREIGN));
17689         res= ER_CANNOT_ADD_FOREIGN;
17690       }
17691       error= res;
17692       set_my_errno(error);
17693       my_error(error, MYF(0), 0);
17694       goto abort;
17695     }
17696   }
17697 
17698   DBUG_RETURN(false);
17699 abort:
17700   if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
17701         == -1)
17702   {
17703     DBUG_PRINT("info", ("Failed to abort schema transaction"));
17704     ERR_PRINT(dict->getNdbError());
17705     error= ndb_to_mysql_error(&dict->getNdbError());
17706   }
17707 
17708 err:
17709   DBUG_RETURN(true);
17710 }
17711 
alter_frm(const char * file,NDB_ALTER_DATA * alter_data)17712 int ha_ndbcluster::alter_frm(const char *file,
17713                              NDB_ALTER_DATA *alter_data)
17714 {
17715   uchar *data= NULL, *pack_data= NULL;
17716   size_t length, pack_length;
17717   int error= 0;
17718 
17719   DBUG_ENTER("alter_frm");
17720 
17721   DBUG_PRINT("enter", ("file: %s", file));
17722 
17723   NDBDICT *dict= alter_data->dictionary;
17724 
17725   // TODO handle this
17726   assert(m_table != 0);
17727 
17728   assert(get_ndb_share_state(m_share) == NSS_ALTERED);
17729   if (readfrm(file, &data, &length) ||
17730       packfrm(data, length, &pack_data, &pack_length))
17731   {
17732     char errbuf[MYSYS_STRERROR_SIZE];
17733     DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
17734     my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
17735     my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
17736     error= 1;
17737     my_error(ER_FILE_NOT_FOUND, MYF(0), file,
17738              my_errno(), my_strerror(errbuf, sizeof(errbuf), my_errno()));
17739   }
17740   else
17741   {
17742     DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",
17743                         m_tabname));
17744     const NDBTAB *old_tab= alter_data->old_table;
17745     NdbDictionary::Table *new_tab= alter_data->new_table;
17746 
17747     new_tab->setFrm(pack_data, (Uint32)pack_length);
17748     if (dict->alterTableGlobal(*old_tab, *new_tab))
17749     {
17750       DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));
17751       error= ndb_to_mysql_error(&dict->getNdbError());
17752       my_error(error, MYF(0), m_tabname);
17753     }
17754     my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
17755     my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
17756   }
17757 
17758   /* ndb_share reference schema(?) free */
17759   DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free  use_count: %u",
17760                            m_share->key_string(), m_share->use_count));
17761 
17762   DBUG_RETURN(error);
17763 }
17764 
17765 bool
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17766 ha_ndbcluster::inplace_alter_table(TABLE *altered_table,
17767                                    Alter_inplace_info *ha_alter_info)
17768 {
17769   DBUG_ENTER("ha_ndbcluster::inplace_alter_table");
17770   int error= 0;
17771   THD *thd= current_thd;
17772   Thd_ndb *thd_ndb= get_thd_ndb(thd);
17773   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
17774   NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17775   NDBDICT *dict= alter_data->dictionary;
17776   const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
17777     ha_alter_info->handler_flags;
17778   const Alter_inplace_info::HA_ALTER_FLAGS dropping=
17779     Alter_inplace_info::DROP_INDEX |
17780     Alter_inplace_info::DROP_UNIQUE_INDEX;
17781 
17782   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::inplace_alter_table"))
17783   {
17784     DBUG_RETURN(true);
17785   }
17786 
17787   bool auto_increment_value_changed= false;
17788   if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
17789   {
17790     if (create_info->auto_increment_value !=
17791       table->file->stats.auto_increment_value)
17792       auto_increment_value_changed= true;
17793   }
17794 
17795   if (alter_flags & dropping)
17796   {
17797     /* Tell the handler to finally drop the indexes. */
17798     if ((error= final_drop_index(table)))
17799     {
17800       print_error(error, MYF(0));
17801       goto abort;
17802     }
17803   }
17804 
17805   if (alter_flags & Alter_inplace_info::DROP_FOREIGN_KEY)
17806   {
17807     const NDBTAB* tab= alter_data->old_table;
17808     if ((error= drop_fk_for_online_alter(thd, thd_ndb->ndb, dict, tab)) != 0)
17809     {
17810       print_error(error, MYF(0));
17811       goto abort;
17812     }
17813   }
17814 
17815   DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));
17816   error= alter_frm(altered_table->s->path.str, alter_data);
17817   if (!error)
17818   {
17819     /*
17820      * Alter succesful, commit schema transaction
17821      */
17822     if (dict->endSchemaTrans() == -1)
17823     {
17824       error= ndb_to_mysql_error(&dict->getNdbError());
17825       DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",
17826                           error));
17827       table->file->print_error(error, MYF(0));
17828       goto err;
17829     }
17830     if (auto_increment_value_changed)
17831       error= set_auto_inc_val(thd, create_info->auto_increment_value);
17832     if (error)
17833     {
17834       DBUG_PRINT("info", ("Failed to set auto_increment value"));
17835       goto err;
17836     }
17837   }
17838   else // if (error)
17839   {
17840 abort:
17841     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
17842         == -1)
17843     {
17844       DBUG_PRINT("info", ("Failed to abort schema transaction"));
17845       ERR_PRINT(dict->getNdbError());
17846     }
17847   }
17848 
17849 err:
17850   DBUG_RETURN(error ? true : false);
17851 }
17852 
17853 bool
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)17854 ha_ndbcluster::commit_inplace_alter_table(TABLE *altered_table,
17855                                           Alter_inplace_info *ha_alter_info,
17856                                           bool commit)
17857 {
17858   DBUG_ENTER("ha_ndbcluster::commit_inplace_alter_table");
17859 
17860   if (!commit)
17861     DBUG_RETURN(abort_inplace_alter_table(altered_table,
17862                                           ha_alter_info));
17863   THD *thd= current_thd;
17864   Thd_ndb *thd_ndb= get_thd_ndb(thd);
17865   NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17866   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::commit_inplace_alter_table"))
17867   {
17868     DBUG_RETURN(true); // Error
17869   }
17870 
17871   const char *db= table->s->db.str;
17872   const char *name= table->s->table_name.str;
17873   uint32 table_id= 0, table_version= 0;
17874   assert(alter_data != 0);
17875   if (alter_data)
17876   {
17877     table_id= alter_data->table_id;
17878     table_version= alter_data->old_table_version;
17879   }
17880   ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
17881                            db, name,
17882                            table_id, table_version,
17883                            SOT_ONLINE_ALTER_TABLE_PREPARE,
17884                            NULL, NULL);
17885   delete alter_data;
17886   ha_alter_info->handler_ctx= 0;
17887   set_ndb_share_state(m_share, NSS_INITIAL);
17888   free_share(&m_share); // Decrease ref_count
17889   DBUG_RETURN(false); // OK
17890 }
17891 
17892 bool
abort_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)17893 ha_ndbcluster::abort_inplace_alter_table(TABLE *altered_table,
17894                                          Alter_inplace_info *ha_alter_info)
17895 {
17896   DBUG_ENTER("ha_ndbcluster::abort_inplace_alter_table");
17897 
17898   NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
17899   if (!alter_data)
17900   {
17901     // Could not find any alter_data, nothing to abort or already aborted
17902     DBUG_RETURN(false);
17903   }
17904 
17905   NDBDICT *dict= alter_data->dictionary;
17906   if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort) == -1)
17907   {
17908     DBUG_PRINT("info", ("Failed to abort schema transaction"));
17909     ERR_PRINT(dict->getNdbError());
17910   }
17911   /* ndb_share reference schema free */
17912   DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
17913                            m_share->key_string(), m_share->use_count));
17914   delete alter_data;
17915   ha_alter_info->handler_ctx= 0;
17916   set_ndb_share_state(m_share, NSS_INITIAL);
17917   free_share(&m_share); // Decrease ref_count
17918   DBUG_RETURN(false);
17919 }
17920 
notify_table_changed()17921 void ha_ndbcluster::notify_table_changed()
17922 {
17923   DBUG_ENTER("ha_ndbcluster::notify_table_changed ");
17924 
17925   /*
17926     all mysqld's will read frms from disk and setup new
17927     event operation for the table (new_op)
17928   */
17929   THD *thd= current_thd;
17930   const char *db= table->s->db.str;
17931   const char *name= table->s->table_name.str;
17932   uint32 table_id= 0, table_version= 0;
17933 
17934   /*
17935     Get table id/version for new table
17936   */
17937   {
17938     Ndb* ndb= get_ndb(thd);
17939     assert(ndb != 0);
17940     if (ndb)
17941     {
17942       ndb->setDatabaseName(db);
17943       Ndb_table_guard ndbtab(ndb->getDictionary(), name);
17944       const NDBTAB *new_tab= ndbtab.get_table();
17945       assert(new_tab != 0);
17946       if (new_tab)
17947       {
17948         table_id= new_tab->getObjectId();
17949         table_version= new_tab->getObjectVersion();
17950       }
17951     }
17952   }
17953 
17954   /*
17955     all mysqld's will switch to using the new_op, and delete the old
17956     event operation
17957   */
17958   ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
17959                            db, name,
17960                            table_id, table_version,
17961                            SOT_ONLINE_ALTER_TABLE_COMMIT,
17962                            NULL, NULL);
17963 
17964   DBUG_VOID_RETURN;
17965 }
17966 
17967 static
set_up_tablespace(st_alter_tablespace * alter_info,NdbDictionary::Tablespace * ndb_ts)17968 bool set_up_tablespace(st_alter_tablespace *alter_info,
17969                        NdbDictionary::Tablespace *ndb_ts)
17970 {
17971   if (alter_info->extent_size >= (Uint64(1) << 32))
17972   {
17973     // TODO set correct error
17974     return TRUE;
17975   }
17976   ndb_ts->setName(alter_info->tablespace_name);
17977   ndb_ts->setExtentSize(Uint32(alter_info->extent_size));
17978   ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
17979   return FALSE;
17980 }
17981 
17982 static
set_up_datafile(st_alter_tablespace * alter_info,NdbDictionary::Datafile * ndb_df)17983 bool set_up_datafile(st_alter_tablespace *alter_info,
17984                      NdbDictionary::Datafile *ndb_df)
17985 {
17986   if (alter_info->max_size > 0)
17987   {
17988     my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
17989     return TRUE;
17990   }
17991   ndb_df->setPath(alter_info->data_file_name);
17992   ndb_df->setSize(alter_info->initial_size);
17993   ndb_df->setTablespace(alter_info->tablespace_name);
17994   return FALSE;
17995 }
17996 
17997 static
set_up_logfile_group(st_alter_tablespace * alter_info,NdbDictionary::LogfileGroup * ndb_lg)17998 bool set_up_logfile_group(st_alter_tablespace *alter_info,
17999                           NdbDictionary::LogfileGroup *ndb_lg)
18000 {
18001   if (alter_info->undo_buffer_size >= (Uint64(1) << 32))
18002   {
18003     // TODO set correct error
18004     return TRUE;
18005   }
18006 
18007   ndb_lg->setName(alter_info->logfile_group_name);
18008   ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));
18009   return FALSE;
18010 }
18011 
18012 static
set_up_undofile(st_alter_tablespace * alter_info,NdbDictionary::Undofile * ndb_uf)18013 bool set_up_undofile(st_alter_tablespace *alter_info,
18014                      NdbDictionary::Undofile *ndb_uf)
18015 {
18016   ndb_uf->setPath(alter_info->undo_file_name);
18017   ndb_uf->setSize(alter_info->initial_size);
18018   ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
18019   return FALSE;
18020 }
18021 
18022 
18023 /**
18024   Get the tablespace name from the NDB dictionary for the given table in the
18025   given schema.
18026 
18027   @note For NDB tables with version before 50120, the server must ask the
18028         SE for the tablespace name, because for these tables, the tablespace
18029         name is not stored in the .FRM file, but only within the SE itself.
18030 
18031   @note The function is essentially doing the same as the corresponding code
18032         block in the function 'get_metadata()', except for the handling of
18033         empty strings, which are in this case returned as "" rather than NULL.
18034 
18035   @param       thd              Thread context.
18036   @param       db_name          Name of the relevant schema.
18037   @param       table_name       Name of the relevant table.
18038   @param [out] tablespace_name  Name of the tablespace containing the table.
18039 
18040   @return Operation status.
18041     @retval == 0  Success.
18042     @retval != 0  Error (handler error code returned).
18043  */
18044 
18045 static
ndbcluster_get_tablespace(THD * thd,LEX_CSTRING db_name,LEX_CSTRING table_name,LEX_CSTRING * tablespace_name)18046 int ndbcluster_get_tablespace(THD* thd,
18047                               LEX_CSTRING db_name,
18048                               LEX_CSTRING table_name,
18049                               LEX_CSTRING *tablespace_name)
18050 {
18051   DBUG_ENTER("ndbcluster_get_tablespace");
18052   DBUG_PRINT("enter", ("db_name: %s, table_name: %s", db_name.str,
18053              table_name.str));
18054   assert(tablespace_name != NULL);
18055 
18056   Ndb* ndb= check_ndb_in_thd(thd);
18057   if (ndb == NULL)
18058     DBUG_RETURN(HA_ERR_NO_CONNECTION);
18059 
18060   NDBDICT *dict= ndb->getDictionary();
18061   const NDBTAB *tab= NULL;
18062 
18063   ndb->setDatabaseName(db_name.str);
18064   Ndb_table_guard ndbtab_g(dict, table_name.str);
18065   if (!(tab= ndbtab_g.get_table()))
18066     ERR_RETURN(dict->getNdbError());
18067 
18068   Uint32 id;
18069   if (tab->getTablespace(&id))
18070   {
18071     NdbDictionary::Tablespace ts= dict->getTablespace(id);
18072     NdbError ndberr= dict->getNdbError();
18073     if (ndberr.classification == NdbError::NoError)
18074     {
18075       const char *tablespace= ts.getName();
18076       assert(tablespace);
18077       const size_t tablespace_len= strlen(tablespace);
18078       DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
18079       thd->make_lex_string(tablespace_name, tablespace, tablespace_len, false);
18080     }
18081   }
18082 
18083   DBUG_RETURN(0);
18084 }
18085 
18086 static
ndbcluster_alter_tablespace(handlerton * hton,THD * thd,st_alter_tablespace * alter_info)18087 int ndbcluster_alter_tablespace(handlerton *hton,
18088                                 THD* thd, st_alter_tablespace *alter_info)
18089 {
18090   int is_tablespace= 0;
18091   NdbError err;
18092   NDBDICT *dict;
18093   int error;
18094   const char *errmsg= NULL;
18095   Ndb *ndb;
18096   DBUG_ENTER("ndbcluster_alter_tablespace");
18097 
18098   ndb= check_ndb_in_thd(thd);
18099   if (ndb == NULL)
18100   {
18101     DBUG_RETURN(HA_ERR_NO_CONNECTION);
18102   }
18103   dict= ndb->getDictionary();
18104 
18105   uint32 table_id= 0, table_version= 0;
18106   switch (alter_info->ts_cmd_type){
18107   case (CREATE_TABLESPACE):
18108   {
18109     error= ER_CREATE_FILEGROUP_FAILED;
18110 
18111     NdbDictionary::Tablespace ndb_ts;
18112     NdbDictionary::Datafile ndb_df;
18113     NdbDictionary::ObjectId objid;
18114     if (set_up_tablespace(alter_info, &ndb_ts))
18115     {
18116       DBUG_RETURN(1);
18117     }
18118     if (set_up_datafile(alter_info, &ndb_df))
18119     {
18120       DBUG_RETURN(1);
18121     }
18122     errmsg= "TABLESPACE";
18123     if (dict->createTablespace(ndb_ts, &objid))
18124     {
18125       DBUG_PRINT("error", ("createTablespace returned %d", error));
18126       goto ndberror;
18127     }
18128     table_id = objid.getObjectId();
18129     table_version = objid.getObjectVersion();
18130     if (dict->getWarningFlags() &
18131         NdbDictionary::Dictionary::WarnExtentRoundUp)
18132     {
18133       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18134                           dict->getWarningFlags(),
18135                           "Extent size rounded up to kernel page size");
18136     }
18137     DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
18138     errmsg= "DATAFILE";
18139     if (dict->createDatafile(ndb_df))
18140     {
18141       err= dict->getNdbError();
18142       NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
18143       if (dict->getNdbError().code == 0 &&
18144 	  tmp.getObjectId() == objid.getObjectId() &&
18145 	  tmp.getObjectVersion() == objid.getObjectVersion())
18146       {
18147 	dict->dropTablespace(tmp);
18148       }
18149 
18150       DBUG_PRINT("error", ("createDatafile returned %d", error));
18151       goto ndberror2;
18152     }
18153     if (dict->getWarningFlags() &
18154         NdbDictionary::Dictionary::WarnDatafileRoundUp)
18155     {
18156       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18157                           dict->getWarningFlags(),
18158                           "Datafile size rounded up to extent size");
18159     }
18160     else /* produce only 1 message */
18161     if (dict->getWarningFlags() &
18162         NdbDictionary::Dictionary::WarnDatafileRoundDown)
18163     {
18164       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18165                           dict->getWarningFlags(),
18166                           "Datafile size rounded down to extent size");
18167     }
18168     is_tablespace= 1;
18169     break;
18170   }
18171   case (ALTER_TABLESPACE):
18172   {
18173     error= ER_ALTER_FILEGROUP_FAILED;
18174     if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
18175     {
18176       NdbDictionary::Datafile ndb_df;
18177       if (set_up_datafile(alter_info, &ndb_df))
18178       {
18179 	DBUG_RETURN(1);
18180       }
18181       errmsg= " CREATE DATAFILE";
18182       NdbDictionary::ObjectId objid;
18183       if (dict->createDatafile(ndb_df, false, &objid))
18184       {
18185 	goto ndberror;
18186       }
18187       table_id= objid.getObjectId();
18188       table_version= objid.getObjectVersion();
18189       if (dict->getWarningFlags() &
18190           NdbDictionary::Dictionary::WarnDatafileRoundUp)
18191       {
18192         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18193                             dict->getWarningFlags(),
18194                             "Datafile size rounded up to extent size");
18195       }
18196       else /* produce only 1 message */
18197       if (dict->getWarningFlags() &
18198           NdbDictionary::Dictionary::WarnDatafileRoundDown)
18199       {
18200         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18201                             dict->getWarningFlags(),
18202                             "Datafile size rounded down to extent size");
18203       }
18204     }
18205     else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
18206     {
18207       NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
18208       NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
18209       NdbDictionary::ObjectId objid;
18210       df.getTablespaceId(&objid);
18211       table_id = df.getObjectId();
18212       table_version = df.getObjectVersion();
18213       if (ts.getObjectId() == objid.getObjectId() &&
18214 	  strcmp(df.getPath(), alter_info->data_file_name) == 0)
18215       {
18216 	errmsg= " DROP DATAFILE";
18217 	if (dict->dropDatafile(df))
18218 	{
18219 	  goto ndberror;
18220 	}
18221       }
18222       else
18223       {
18224 	DBUG_PRINT("error", ("No such datafile"));
18225 	my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
18226 	DBUG_RETURN(1);
18227       }
18228     }
18229     else
18230     {
18231       DBUG_PRINT("error", ("Unsupported alter tablespace: %d",
18232 			   alter_info->ts_alter_tablespace_type));
18233       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18234     }
18235     is_tablespace= 1;
18236     break;
18237   }
18238   case (CREATE_LOGFILE_GROUP):
18239   {
18240     error= ER_CREATE_FILEGROUP_FAILED;
18241     NdbDictionary::LogfileGroup ndb_lg;
18242     NdbDictionary::Undofile ndb_uf;
18243     NdbDictionary::ObjectId objid;
18244     if (alter_info->undo_file_name == NULL)
18245     {
18246       /*
18247 	REDO files in LOGFILE GROUP not supported yet
18248       */
18249       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18250     }
18251     if (set_up_logfile_group(alter_info, &ndb_lg))
18252     {
18253       DBUG_RETURN(1);
18254     }
18255     errmsg= "LOGFILE GROUP";
18256     if (dict->createLogfileGroup(ndb_lg, &objid))
18257     {
18258       goto ndberror;
18259     }
18260     table_id = objid.getObjectId();
18261     table_version = objid.getObjectVersion();
18262     if (dict->getWarningFlags() &
18263         NdbDictionary::Dictionary::WarnUndobufferRoundUp)
18264     {
18265       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18266                           dict->getWarningFlags(),
18267                           "Undo buffer size rounded up to kernel page size");
18268     }
18269     DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
18270     if (set_up_undofile(alter_info, &ndb_uf))
18271     {
18272       DBUG_RETURN(1);
18273     }
18274     errmsg= "UNDOFILE";
18275     if (dict->createUndofile(ndb_uf))
18276     {
18277       err= dict->getNdbError();
18278       NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
18279       if (dict->getNdbError().code == 0 &&
18280 	  tmp.getObjectId() == objid.getObjectId() &&
18281 	  tmp.getObjectVersion() == objid.getObjectVersion())
18282       {
18283 	dict->dropLogfileGroup(tmp);
18284       }
18285       goto ndberror2;
18286     }
18287     if (dict->getWarningFlags() &
18288         NdbDictionary::Dictionary::WarnUndofileRoundDown)
18289     {
18290       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18291                           dict->getWarningFlags(),
18292                           "Undofile size rounded down to kernel page size");
18293     }
18294     break;
18295   }
18296   case (ALTER_LOGFILE_GROUP):
18297   {
18298     error= ER_ALTER_FILEGROUP_FAILED;
18299     if (alter_info->undo_file_name == NULL)
18300     {
18301       /*
18302 	REDO files in LOGFILE GROUP not supported yet
18303       */
18304       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18305     }
18306     NdbDictionary::Undofile ndb_uf;
18307     if (set_up_undofile(alter_info, &ndb_uf))
18308     {
18309       DBUG_RETURN(1);
18310     }
18311     errmsg= "CREATE UNDOFILE";
18312     NdbDictionary::ObjectId objid;
18313     if (dict->createUndofile(ndb_uf, false, &objid))
18314     {
18315       goto ndberror;
18316     }
18317     table_id = objid.getObjectId();
18318     table_version = objid.getObjectVersion();
18319     if (dict->getWarningFlags() &
18320         NdbDictionary::Dictionary::WarnUndofileRoundDown)
18321     {
18322       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
18323                           dict->getWarningFlags(),
18324                           "Undofile size rounded down to kernel page size");
18325     }
18326     break;
18327   }
18328   case (DROP_TABLESPACE):
18329   {
18330     error= ER_DROP_FILEGROUP_FAILED;
18331     errmsg= "TABLESPACE";
18332     NdbDictionary::Tablespace ts=
18333       dict->getTablespace(alter_info->tablespace_name);
18334     table_id= ts.getObjectId();
18335     table_version= ts.getObjectVersion();
18336     if (dict->dropTablespace(ts))
18337     {
18338       goto ndberror;
18339     }
18340     is_tablespace= 1;
18341     break;
18342   }
18343   case (DROP_LOGFILE_GROUP):
18344   {
18345     error= ER_DROP_FILEGROUP_FAILED;
18346     errmsg= "LOGFILE GROUP";
18347     NdbDictionary::LogfileGroup lg=
18348       dict->getLogfileGroup(alter_info->logfile_group_name);
18349     table_id= lg.getObjectId();
18350     table_version= lg.getObjectVersion();
18351     if (dict->dropLogfileGroup(lg))
18352     {
18353       goto ndberror;
18354     }
18355     break;
18356   }
18357   case (CHANGE_FILE_TABLESPACE):
18358   {
18359     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18360   }
18361   case (ALTER_ACCESS_MODE_TABLESPACE):
18362   {
18363     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18364   }
18365   default:
18366   {
18367     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
18368   }
18369   }
18370   if (is_tablespace)
18371     ndbcluster_log_schema_op(thd,
18372                              thd->query().str, thd->query().length,
18373                              "", alter_info->tablespace_name,
18374                              table_id, table_version,
18375                              SOT_TABLESPACE, NULL, NULL);
18376   else
18377     ndbcluster_log_schema_op(thd,
18378                              thd->query().str, thd->query().length,
18379                              "", alter_info->logfile_group_name,
18380                              table_id, table_version,
18381                              SOT_LOGFILE_GROUP, NULL, NULL);
18382   DBUG_RETURN(FALSE);
18383 
18384 ndberror:
18385   err= dict->getNdbError();
18386 ndberror2:
18387   ndb_to_mysql_error(&err);
18388 
18389   my_error(error, MYF(0), errmsg);
18390   DBUG_RETURN(1);
18391 }
18392 
18393 
get_num_parts(const char * name,uint * num_parts)18394 bool ha_ndbcluster::get_num_parts(const char *name, uint *num_parts)
18395 {
18396   THD *thd= current_thd;
18397   Ndb *ndb;
18398   NDBDICT *dict;
18399   int err= 0;
18400   DBUG_ENTER("ha_ndbcluster::get_num_parts");
18401 
18402   set_dbname(name);
18403   set_tabname(name);
18404   for (;;)
18405   {
18406     if (check_ndb_connection(thd))
18407     {
18408       err= HA_ERR_NO_CONNECTION;
18409       break;
18410     }
18411     ndb= get_ndb(thd);
18412     ndb->setDatabaseName(m_dbname);
18413     Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
18414     if (!ndbtab_g.get_table())
18415       ERR_BREAK(dict->getNdbError(), err);
18416     *num_parts= ndbtab_g.get_table()->getFragmentCount();
18417     DBUG_RETURN(FALSE);
18418   }
18419 
18420   print_error(err, MYF(0));
18421   DBUG_RETURN(TRUE);
18422 }
18423 
ndbcluster_fill_files_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond)18424 static int ndbcluster_fill_files_table(handlerton *hton,
18425                                        THD *thd,
18426                                        TABLE_LIST *tables,
18427                                        Item *cond)
18428 {
18429   TABLE* table= tables->table;
18430   Ndb *ndb= check_ndb_in_thd(thd);
18431   NdbDictionary::Dictionary* dict= ndb->getDictionary();
18432   NdbDictionary::Dictionary::List dflist;
18433   NdbError ndberr;
18434   uint i;
18435   DBUG_ENTER("ndbcluster_fill_files_table");
18436 
18437   dict->listObjects(dflist, NdbDictionary::Object::Datafile);
18438   ndberr= dict->getNdbError();
18439   if (ndberr.classification != NdbError::NoError)
18440     ERR_RETURN(ndberr);
18441 
18442   for (i= 0; i < dflist.count; i++)
18443   {
18444     NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
18445     Ndb_cluster_connection_node_iter iter;
18446     uint id;
18447 
18448     g_ndb_cluster_connection->init_get_next_node(iter);
18449 
18450     while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
18451     {
18452       init_fill_schema_files_row(table);
18453       NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
18454       ndberr= dict->getNdbError();
18455       if(ndberr.classification != NdbError::NoError)
18456       {
18457         if (ndberr.classification == NdbError::SchemaError)
18458           continue;
18459 
18460         if (ndberr.classification == NdbError::UnknownResultError)
18461           continue;
18462 
18463         ERR_RETURN(ndberr);
18464       }
18465       NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
18466       ndberr= dict->getNdbError();
18467       if (ndberr.classification != NdbError::NoError)
18468       {
18469         if (ndberr.classification == NdbError::SchemaError)
18470           continue;
18471         ERR_RETURN(ndberr);
18472       }
18473 
18474       table->field[IS_FILES_FILE_NAME]->set_notnull();
18475       table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
18476                                               system_charset_info);
18477       table->field[IS_FILES_FILE_TYPE]->set_notnull();
18478       table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
18479                                               system_charset_info);
18480       table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
18481       table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
18482                                                     (uint)strlen(df.getTablespace()),
18483                                                     system_charset_info);
18484       table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18485       table->field[IS_FILES_LOGFILE_GROUP_NAME]->
18486         store(ts.getDefaultLogfileGroup(),
18487               (uint)strlen(ts.getDefaultLogfileGroup()),
18488               system_charset_info);
18489       table->field[IS_FILES_ENGINE]->set_notnull();
18490       table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18491                                            ndbcluster_hton_name_length,
18492                                            system_charset_info);
18493 
18494       table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
18495       table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
18496                                                  / ts.getExtentSize(), true);
18497       table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
18498       table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
18499                                                   / ts.getExtentSize(), true);
18500       table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18501       table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
18502       table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
18503       table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);
18504       table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
18505       table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);
18506       table->field[IS_FILES_VERSION]->set_notnull();
18507       table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);
18508 
18509       table->field[IS_FILES_ROW_FORMAT]->set_notnull();
18510       table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);
18511 
18512       char extra[30];
18513       int len= (int)my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
18514       table->field[IS_FILES_EXTRA]->set_notnull();
18515       table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18516       schema_table_store_record(thd, table);
18517     }
18518   }
18519 
18520   NdbDictionary::Dictionary::List tslist;
18521   dict->listObjects(tslist, NdbDictionary::Object::Tablespace);
18522   ndberr= dict->getNdbError();
18523   if (ndberr.classification != NdbError::NoError)
18524     ERR_RETURN(ndberr);
18525 
18526   for (i= 0; i < tslist.count; i++)
18527   {
18528     NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];
18529 
18530     NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);
18531     ndberr= dict->getNdbError();
18532     if (ndberr.classification != NdbError::NoError)
18533     {
18534       if (ndberr.classification == NdbError::SchemaError)
18535         continue;
18536       ERR_RETURN(ndberr);
18537     }
18538 
18539     init_fill_schema_files_row(table);
18540     table->field[IS_FILES_FILE_TYPE]->set_notnull();
18541     table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,
18542                                             system_charset_info);
18543 
18544     table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
18545     table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,
18546                                                      (uint)strlen(elt.name),
18547                                                      system_charset_info);
18548     table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18549     table->field[IS_FILES_LOGFILE_GROUP_NAME]->
18550       store(ts.getDefaultLogfileGroup(),
18551            (uint)strlen(ts.getDefaultLogfileGroup()),
18552            system_charset_info);
18553 
18554     table->field[IS_FILES_ENGINE]->set_notnull();
18555     table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18556                                          ndbcluster_hton_name_length,
18557                                          system_charset_info);
18558 
18559     table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18560     table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
18561 
18562     table->field[IS_FILES_VERSION]->set_notnull();
18563     table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);
18564 
18565     schema_table_store_record(thd, table);
18566   }
18567 
18568   NdbDictionary::Dictionary::List uflist;
18569   dict->listObjects(uflist, NdbDictionary::Object::Undofile);
18570   ndberr= dict->getNdbError();
18571   if (ndberr.classification != NdbError::NoError)
18572     ERR_RETURN(ndberr);
18573 
18574   for (i= 0; i < uflist.count; i++)
18575   {
18576     NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
18577     Ndb_cluster_connection_node_iter iter;
18578     unsigned id;
18579 
18580     g_ndb_cluster_connection->init_get_next_node(iter);
18581 
18582     while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
18583     {
18584       NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
18585       ndberr= dict->getNdbError();
18586       if (ndberr.classification != NdbError::NoError)
18587       {
18588         if (ndberr.classification == NdbError::SchemaError)
18589           continue;
18590         if (ndberr.classification == NdbError::UnknownResultError)
18591           continue;
18592         ERR_RETURN(ndberr);
18593       }
18594       NdbDictionary::LogfileGroup lfg=
18595         dict->getLogfileGroup(uf.getLogfileGroup());
18596       ndberr= dict->getNdbError();
18597       if (ndberr.classification != NdbError::NoError)
18598       {
18599         if (ndberr.classification == NdbError::SchemaError)
18600           continue;
18601         ERR_RETURN(ndberr);
18602       }
18603 
18604       init_fill_schema_files_row(table);
18605       table->field[IS_FILES_FILE_NAME]->set_notnull();
18606       table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
18607                                               system_charset_info);
18608       table->field[IS_FILES_FILE_TYPE]->set_notnull();
18609       table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
18610                                               system_charset_info);
18611       NdbDictionary::ObjectId objid;
18612       uf.getLogfileGroupId(&objid);
18613       table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18614       table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
18615                                                   (uint)strlen(uf.getLogfileGroup()),
18616                                                        system_charset_info);
18617       table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
18618       table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);
18619       table->field[IS_FILES_ENGINE]->set_notnull();
18620       table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18621                                            ndbcluster_hton_name_length,
18622                                            system_charset_info);
18623 
18624       table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
18625       table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);
18626       table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18627       table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
18628 
18629       table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
18630       table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);
18631       table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
18632       table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);
18633 
18634       table->field[IS_FILES_VERSION]->set_notnull();
18635       table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);
18636 
18637       char extra[100];
18638       int len= (int)my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
18639                            id, (ulong) lfg.getUndoBufferSize());
18640       table->field[IS_FILES_EXTRA]->set_notnull();
18641       table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18642       schema_table_store_record(thd, table);
18643     }
18644   }
18645 
18646   // now for LFGs
18647   NdbDictionary::Dictionary::List lfglist;
18648   dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
18649   ndberr= dict->getNdbError();
18650   if (ndberr.classification != NdbError::NoError)
18651     ERR_RETURN(ndberr);
18652 
18653   for (i= 0; i < lfglist.count; i++)
18654   {
18655     NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];
18656 
18657     NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
18658     ndberr= dict->getNdbError();
18659     if (ndberr.classification != NdbError::NoError)
18660     {
18661       if (ndberr.classification == NdbError::SchemaError)
18662         continue;
18663       ERR_RETURN(ndberr);
18664     }
18665 
18666     init_fill_schema_files_row(table);
18667     table->field[IS_FILES_FILE_TYPE]->set_notnull();
18668     table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
18669                                             system_charset_info);
18670 
18671     table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
18672     table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
18673                                                      (uint)strlen(elt.name),
18674                                                      system_charset_info);
18675     table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
18676     table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);
18677     table->field[IS_FILES_ENGINE]->set_notnull();
18678     table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
18679                                          ndbcluster_hton_name_length,
18680                                          system_charset_info);
18681 
18682     table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
18683     table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);
18684     table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
18685     table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
18686 
18687     table->field[IS_FILES_VERSION]->set_notnull();
18688     table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);
18689 
18690     char extra[100];
18691     int len= (int)my_snprintf(extra,sizeof(extra),
18692                          "UNDO_BUFFER_SIZE=%lu",
18693                          (ulong) lfg.getUndoBufferSize());
18694     table->field[IS_FILES_EXTRA]->set_notnull();
18695     table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
18696     schema_table_store_record(thd, table);
18697   }
18698   DBUG_RETURN(0);
18699 }
18700 
show_ndb_vars(THD * thd,SHOW_VAR * var,char * buff)18701 static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)
18702 {
18703   if (!check_ndb_in_thd(thd))
18704     return -1;
18705   struct st_ndb_status *st;
18706   SHOW_VAR *st_var;
18707   {
18708     char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +
18709                                 sizeof(ndb_status_variables_dynamic));
18710     st= new (mem) st_ndb_status;
18711     st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));
18712     memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));
18713     int i= 0;
18714     SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);
18715     for (; tmp->value; tmp++, i++)
18716       st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);
18717   }
18718   {
18719     Thd_ndb *thd_ndb= get_thd_ndb(thd);
18720     Ndb_cluster_connection *c= thd_ndb->connection;
18721     update_status_variables(thd_ndb, st, c);
18722   }
18723   var->type= SHOW_ARRAY;
18724   var->value= (char *) st_var;
18725   return 0;
18726 }
18727 
18728 SHOW_VAR ndb_status_variables_export[]= {
18729   {"Ndb",          (char*) &show_ndb_vars,                 SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
18730   {"Ndb_conflict", (char*) &show_ndb_conflict_status_vars, SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
18731   {"Ndb",          (char*) &ndb_status_injector_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18732   {"Ndb",          (char*) &ndb_status_slave_variables,    SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18733   {"Ndb",          (char*) &show_ndb_server_api_stats,     SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
18734   {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
18735   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
18736 };
18737 
18738 
cache_check_time_update(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18739 static void cache_check_time_update(MYSQL_THD thd,
18740                                     struct st_mysql_sys_var *var,
18741                                     void *var_ptr,
18742                                     const void *save)
18743 {
18744   push_warning_printf(thd, Sql_condition::SL_WARNING,
18745                       ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT,
18746                       ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT),
18747                       "@@ndb_cache_check_time");
18748 
18749   opt_ndb_cache_check_time= *static_cast<const ulong*>(save);
18750 }
18751 
18752 
18753 static MYSQL_SYSVAR_ULONG(
18754   cache_check_time,                  /* name */
18755   opt_ndb_cache_check_time,              /* var */
18756   PLUGIN_VAR_RQCMDARG,
18757   "A dedicated thread is created to, at the given "
18758   "millisecond interval, invalidate the query cache "
18759   "if another MySQL server in the cluster has changed "
18760   "the data in the database. "
18761   "This variable is deprecated and will be removed in a future release.",
18762   NULL,                              /* check func. */
18763   &cache_check_time_update,          /* update func. */
18764   0,                                 /* default */
18765   0,                                 /* min */
18766   ONE_YEAR_IN_SECONDS,               /* max */
18767   0                                  /* block */
18768 );
18769 
18770 
18771 static MYSQL_SYSVAR_ULONG(
18772   extra_logging,                     /* name */
18773   opt_ndb_extra_logging,                 /* var */
18774   PLUGIN_VAR_OPCMDARG,
18775   "Turn on more logging in the error log.",
18776   NULL,                              /* check func. */
18777   NULL,                              /* update func. */
18778   1,                                 /* default */
18779   0,                                 /* min */
18780   0,                                 /* max */
18781   0                                  /* block */
18782 );
18783 
18784 
18785 static MYSQL_SYSVAR_ULONG(
18786   wait_connected,                    /* name */
18787   opt_ndb_wait_connected,            /* var */
18788   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18789   "Time (in seconds) for mysqld to wait for connection "
18790   "to cluster management and data nodes.",
18791   NULL,                              /* check func. */
18792   NULL,                              /* update func. */
18793   30,                                /* default */
18794   0,                                 /* min */
18795   ONE_YEAR_IN_SECONDS,               /* max */
18796   0                                  /* block */
18797 );
18798 
18799 
18800 static MYSQL_SYSVAR_ULONG(
18801   wait_setup,                        /* name */
18802   opt_ndb_wait_setup,                /* var */
18803   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18804   "Time (in seconds) for mysqld to wait for setup to "
18805   "complete (0 = no wait)",
18806   NULL,                              /* check func. */
18807   NULL,                              /* update func. */
18808   30,                                /* default */
18809   0,                                 /* min */
18810   ONE_YEAR_IN_SECONDS,               /* max */
18811   0                                  /* block */
18812 );
18813 
18814 static const int MAX_CLUSTER_CONNECTIONS = 63;
18815 
18816 static MYSQL_SYSVAR_UINT(
18817   cluster_connection_pool,           /* name */
18818   opt_ndb_cluster_connection_pool,   /* var */
18819   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
18820   "Pool of cluster connections to be used by mysql server.",
18821   NULL,                              /* check func. */
18822   NULL,                              /* update func. */
18823   1,                                 /* default */
18824   1,                                 /* min */
18825   MAX_CLUSTER_CONNECTIONS,           /* max */
18826   0                                  /* block */
18827 );
18828 
18829 static const int MIN_ACTIVATION_THRESHOLD = 0;
18830 static const int MAX_ACTIVATION_THRESHOLD = 16;
18831 
18832 static
18833 int
ndb_recv_thread_activation_threshold_check(MYSQL_THD thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)18834 ndb_recv_thread_activation_threshold_check(MYSQL_THD thd,
18835                                            struct st_mysql_sys_var *var,
18836                                            void *save,
18837                                            struct st_mysql_value *value)
18838 {
18839   long long int_buf;
18840   int val = (int)value->val_int(value, &int_buf);
18841   int new_val = (int)int_buf;
18842 
18843   if (val != 0 ||
18844       new_val < MIN_ACTIVATION_THRESHOLD ||
18845       new_val > MAX_ACTIVATION_THRESHOLD)
18846   {
18847     return 1;
18848   }
18849   opt_ndb_recv_thread_activation_threshold = new_val;
18850   return 0;
18851 }
18852 
18853 static
18854 void
ndb_recv_thread_activation_threshold_update(MYSQL_THD,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18855 ndb_recv_thread_activation_threshold_update(MYSQL_THD,
18856                                             struct st_mysql_sys_var *var,
18857                                             void *var_ptr,
18858                                             const void *save)
18859 {
18860   ndb_set_recv_thread_activation_threshold(
18861     opt_ndb_recv_thread_activation_threshold);
18862 }
18863 
18864 static MYSQL_SYSVAR_UINT(
18865   recv_thread_activation_threshold,         /* name */
18866   opt_ndb_recv_thread_activation_threshold, /* var */
18867   PLUGIN_VAR_RQCMDARG,
18868   "Activation threshold when receive thread takes over the polling "
18869   "of the cluster connection (measured in concurrently active "
18870   "threads)",
18871   ndb_recv_thread_activation_threshold_check,  /* check func. */
18872   ndb_recv_thread_activation_threshold_update, /* update func. */
18873   8,                                           /* default */
18874   MIN_ACTIVATION_THRESHOLD,                    /* min */
18875   MAX_ACTIVATION_THRESHOLD,                    /* max */
18876   0                                            /* block */
18877 );
18878 
18879 
18880 /* Definitions needed for receive thread cpu mask config variable */
18881 static const int ndb_recv_thread_cpu_mask_option_buf_size = 512;
18882 char ndb_recv_thread_cpu_mask_option_buf[ndb_recv_thread_cpu_mask_option_buf_size];
18883 Uint16 recv_thread_cpuid_array[1 * MAX_CLUSTER_CONNECTIONS];
18884 
18885 static
18886 int
ndb_recv_thread_cpu_mask_check(MYSQL_THD thd,struct st_mysql_sys_var * var,void * save,struct st_mysql_value * value)18887 ndb_recv_thread_cpu_mask_check(MYSQL_THD thd,
18888                                struct st_mysql_sys_var *var,
18889                                void *save,
18890                                struct st_mysql_value *value)
18891 {
18892   char buf[ndb_recv_thread_cpu_mask_option_buf_size];
18893   int len = sizeof(buf);
18894   const char *str = value->val_str(value, buf, &len);
18895 
18896   return ndb_recv_thread_cpu_mask_check_str(str);
18897 }
18898 
18899 static int
ndb_recv_thread_cpu_mask_check_str(const char * str)18900 ndb_recv_thread_cpu_mask_check_str(const char *str)
18901 {
18902   unsigned i;
18903   SparseBitmask bitmask;
18904 
18905   recv_thread_num_cpus = 0;
18906   if (str == 0)
18907   {
18908     /* Setting to empty string is interpreted as remove locking to CPU */
18909     return 0;
18910   }
18911 
18912   if (parse_mask(str, bitmask) < 0)
18913   {
18914     sql_print_information("Trying to set ndb_recv_thread_cpu_mask to"
18915                           " illegal value = %s, ignored",
18916                           str);
18917     goto error;
18918   }
18919   for (i = bitmask.find(0);
18920        i != SparseBitmask::NotFound;
18921        i = bitmask.find(i + 1))
18922   {
18923     if (recv_thread_num_cpus ==
18924         1 * MAX_CLUSTER_CONNECTIONS)
18925     {
18926       sql_print_information("Trying to set too many CPU's in "
18927                             "ndb_recv_thread_cpu_mask, ignored"
18928                             " this variable, erroneus value = %s",
18929                             str);
18930       goto error;
18931     }
18932     recv_thread_cpuid_array[recv_thread_num_cpus++] = i;
18933   }
18934   return 0;
18935 error:
18936   return 1;
18937 }
18938 
18939 static
18940 void
ndb_recv_thread_cpu_mask_update()18941 ndb_recv_thread_cpu_mask_update()
18942 {
18943   ndb_set_recv_thread_cpu(recv_thread_cpuid_array,
18944                           recv_thread_num_cpus);
18945 }
18946 
18947 static
18948 void
ndb_recv_thread_cpu_mask_update_func(MYSQL_THD,struct st_mysql_sys_var * var,void * var_ptr,const void * save)18949 ndb_recv_thread_cpu_mask_update_func(MYSQL_THD,
18950                                      struct st_mysql_sys_var *var,
18951                                      void *var_ptr,
18952                                      const void *save)
18953 {
18954   ndb_recv_thread_cpu_mask_update();
18955 }
18956 
18957 static MYSQL_SYSVAR_STR(
18958   recv_thread_cpu_mask,             /* name */
18959   opt_ndb_recv_thread_cpu_mask,     /* var */
18960   PLUGIN_VAR_RQCMDARG,
18961   "CPU mask for locking receiver threads to specific CPU, specified "
18962   " as hexadecimal as e.g. 0x33, one CPU is used per receiver thread.",
18963   ndb_recv_thread_cpu_mask_check,      /* check func. */
18964   ndb_recv_thread_cpu_mask_update_func,/* update func. */
18965   ndb_recv_thread_cpu_mask_option_buf
18966 );
18967 
18968 /* should be in index_stat.h */
18969 
18970 extern int
18971 ndb_index_stat_option_check(MYSQL_THD,
18972                             struct st_mysql_sys_var *var,
18973                             void *save,
18974                             struct st_mysql_value *value);
18975 extern void
18976 ndb_index_stat_option_update(MYSQL_THD,
18977                              struct st_mysql_sys_var *var,
18978                              void *var_ptr,
18979                              const void *save);
18980 
18981 extern char ndb_index_stat_option_buf[];
18982 
18983 static MYSQL_SYSVAR_STR(
18984   index_stat_option,                /* name */
18985   opt_ndb_index_stat_option,        /* var */
18986   PLUGIN_VAR_RQCMDARG,
18987   "Comma-separated tunable options for ndb index statistics",
18988   ndb_index_stat_option_check,      /* check func. */
18989   ndb_index_stat_option_update,     /* update func. */
18990   ndb_index_stat_option_buf
18991 );
18992 
18993 
18994 ulong opt_ndb_report_thresh_binlog_epoch_slip;
18995 static MYSQL_SYSVAR_ULONG(
18996   report_thresh_binlog_epoch_slip,   /* name */
18997   opt_ndb_report_thresh_binlog_epoch_slip,/* var */
18998   PLUGIN_VAR_RQCMDARG,
18999   "Threshold on number of epochs to be behind before reporting binlog "
19000   "status. E.g. 3 means that if the difference between what epoch has "
19001   "been received from the storage nodes and what has been applied to "
19002   "the binlog is 3 or more, a status message will be sent to the cluster "
19003   "log.",
19004   NULL,                              /* check func. */
19005   NULL,                              /* update func. */
19006   3,                                 /* default */
19007   0,                                 /* min */
19008   256,                               /* max */
19009   0                                  /* block */
19010 );
19011 
19012 
19013 ulong opt_ndb_report_thresh_binlog_mem_usage;
19014 static MYSQL_SYSVAR_ULONG(
19015   report_thresh_binlog_mem_usage,    /* name */
19016   opt_ndb_report_thresh_binlog_mem_usage,/* var */
19017   PLUGIN_VAR_RQCMDARG,
19018   "Threshold on percentage of free memory before reporting binlog "
19019   "status. E.g. 10 means that if amount of available memory for "
19020   "receiving binlog data from the storage nodes goes below 10%, "
19021   "a status message will be sent to the cluster log.",
19022   NULL,                              /* check func. */
19023   NULL,                              /* update func. */
19024   10,                                /* default */
19025   0,                                 /* min */
19026   100,                               /* max */
19027   0                                  /* block */
19028 );
19029 
19030 
19031 ulong opt_ndb_eventbuffer_max_alloc;
19032 static MYSQL_SYSVAR_ULONG(
19033   eventbuffer_max_alloc,             /* name */
19034   opt_ndb_eventbuffer_max_alloc,     /* var */
19035   PLUGIN_VAR_RQCMDARG,
19036   "Maximum memory that can be allocated for buffering "
19037   "events by the ndb api.",
19038   NULL,                              /* check func. */
19039   NULL,                              /* update func. */
19040   0,                                 /* default */
19041   0,                                 /* min */
19042   UINT_MAX32,                        /* max */
19043   0                                  /* block */
19044 );
19045 
19046 
19047 uint opt_ndb_eventbuffer_free_percent;
19048 static MYSQL_SYSVAR_UINT(
19049   eventbuffer_free_percent, /* name */
19050   opt_ndb_eventbuffer_free_percent,/* var */
19051   PLUGIN_VAR_RQCMDARG,
19052   "Percentage of free memory that should be available "
19053   "in event buffer before resuming buffering "
19054   "after the max_alloc limit is hit.",
19055   NULL, /* check func. */
19056   NULL, /* update func. */
19057   20, /* default */
19058   1, /* min */
19059   99, /* max */
19060   0 /* block */
19061 );
19062 
19063 
19064 my_bool opt_ndb_log_update_as_write;
19065 static MYSQL_SYSVAR_BOOL(
19066   log_update_as_write,               /* name */
19067   opt_ndb_log_update_as_write,       /* var */
19068   PLUGIN_VAR_OPCMDARG,
19069   "For efficiency log only after image as a write event. "
19070   "Ignore before image. This may cause compatibility problems if "
19071   "replicating to other storage engines than ndbcluster.",
19072   NULL,                              /* check func. */
19073   NULL,                              /* update func. */
19074   1                                  /* default */
19075 );
19076 
19077 my_bool opt_ndb_log_update_minimal;
19078 static MYSQL_SYSVAR_BOOL(
19079   log_update_minimal,                  /* name */
19080   opt_ndb_log_update_minimal,          /* var */
19081   PLUGIN_VAR_OPCMDARG,
19082   "For efficiency, log updates in a minimal format"
19083   "Log only the primary key value(s) in the before "
19084   "image. Log only the changed columns in the after "
19085   "image. This may cause compatibility problems if "
19086   "replicating to other storage engines than ndbcluster.",
19087   NULL,                              /* check func. */
19088   NULL,                              /* update func. */
19089   0                                  /* default */
19090 );
19091 
19092 my_bool opt_ndb_log_updated_only;
19093 static MYSQL_SYSVAR_BOOL(
19094   log_updated_only,                  /* name */
19095   opt_ndb_log_updated_only,          /* var */
19096   PLUGIN_VAR_OPCMDARG,
19097   "For efficiency log only updated columns. Columns are considered "
19098   "as \"updated\" even if they are updated with the same value. "
19099   "This may cause compatibility problems if "
19100   "replicating to other storage engines than ndbcluster.",
19101   NULL,                              /* check func. */
19102   NULL,                              /* update func. */
19103   1                                  /* default */
19104 );
19105 
19106 my_bool opt_ndb_log_empty_update;
19107 static MYSQL_SYSVAR_BOOL(
19108   log_empty_update,                  /* name */
19109   opt_ndb_log_empty_update,          /* var */
19110   PLUGIN_VAR_OPCMDARG,
19111   "Normally empty updates are filtered away "
19112   "before they are logged. However, for read tracking "
19113   "in conflict resolution a hidden pesudo attribute is "
19114   "set which will result in an empty update along with "
19115   "special flags set. For this to work empty updates "
19116   "have to be allowed.",
19117   NULL,                              /* check func. */
19118   NULL,                              /* update func. */
19119   0                                  /* default */
19120 );
19121 
19122 my_bool opt_ndb_log_orig;
19123 static MYSQL_SYSVAR_BOOL(
19124   log_orig,                          /* name */
19125   opt_ndb_log_orig,                  /* var */
19126   PLUGIN_VAR_OPCMDARG,
19127   "Log originating server id and epoch in ndb_binlog_index. Each epoch "
19128   "may in this case have multiple rows in ndb_binlog_index, one for "
19129   "each originating epoch.",
19130   NULL,                              /* check func. */
19131   NULL,                              /* update func. */
19132   0                                  /* default */
19133 );
19134 
19135 
19136 my_bool opt_ndb_log_bin;
19137 static MYSQL_SYSVAR_BOOL(
19138   log_bin,                           /* name */
19139   opt_ndb_log_bin,                   /* var */
19140   PLUGIN_VAR_OPCMDARG,
19141   "Log ndb tables in the binary log. Option only has meaning if "
19142   "the binary log has been turned on for the server.",
19143   NULL,                              /* check func. */
19144   NULL,                              /* update func. */
19145   1                                  /* default */
19146 );
19147 
19148 
19149 my_bool opt_ndb_log_binlog_index;
19150 static MYSQL_SYSVAR_BOOL(
19151   log_binlog_index,                  /* name */
19152   opt_ndb_log_binlog_index,          /* var */
19153   PLUGIN_VAR_OPCMDARG,
19154   "Insert mapping between epochs and binlog positions into the "
19155   "ndb_binlog_index table.",
19156   NULL,                              /* check func. */
19157   NULL,                              /* update func. */
19158   1                                  /* default */
19159 );
19160 
19161 
19162 static my_bool opt_ndb_log_empty_epochs;
19163 static MYSQL_SYSVAR_BOOL(
19164   log_empty_epochs,                  /* name */
19165   opt_ndb_log_empty_epochs,          /* var */
19166   PLUGIN_VAR_OPCMDARG,
19167   "",
19168   NULL,                              /* check func. */
19169   NULL,                              /* update func. */
19170   0                                  /* default */
19171 );
19172 
ndb_log_empty_epochs(void)19173 bool ndb_log_empty_epochs(void)
19174 {
19175   return opt_ndb_log_empty_epochs;
19176 }
19177 
19178 my_bool opt_ndb_log_apply_status;
19179 static MYSQL_SYSVAR_BOOL(
19180   log_apply_status,                 /* name */
19181   opt_ndb_log_apply_status,         /* var */
19182   PLUGIN_VAR_OPCMDARG,
19183   "Log ndb_apply_status updates from Master in the Binlog",
19184   NULL,                             /* check func. */
19185   NULL,                             /* update func. */
19186   0                                 /* default */
19187 );
19188 
19189 
19190 my_bool opt_ndb_log_transaction_id;
19191 static MYSQL_SYSVAR_BOOL(
19192   log_transaction_id,               /* name */
19193   opt_ndb_log_transaction_id,       /* var  */
19194   PLUGIN_VAR_OPCMDARG,
19195   "Log Ndb transaction identities per row in the Binlog",
19196   NULL,                             /* check func. */
19197   NULL,                             /* update func. */
19198   0                                 /* default */
19199 );
19200 
19201 
19202 static MYSQL_SYSVAR_STR(
19203   connectstring,                    /* name */
19204   opt_ndb_connectstring,            /* var */
19205   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19206   "Connect string for ndbcluster.",
19207   NULL,                             /* check func. */
19208   NULL,                             /* update func. */
19209   NULL                              /* default */
19210 );
19211 
19212 
19213 static MYSQL_SYSVAR_STR(
19214   mgmd_host,                        /* name */
19215   opt_ndb_connectstring,                /* var */
19216   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19217   "Same as --ndb-connectstring",
19218   NULL,                             /* check func. */
19219   NULL,                             /* update func. */
19220   NULL                              /* default */
19221 );
19222 
19223 
19224 static MYSQL_SYSVAR_UINT(
19225   nodeid,                           /* name */
19226   opt_ndb_nodeid,                   /* var */
19227   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
19228   "Set nodeid for this node. Overrides node id specified "
19229   "in --ndb-connectstring.",
19230   NULL,                             /* check func. */
19231   NULL,                             /* update func. */
19232   0,                                /* default */
19233   0,                                /* min */
19234   MAX_NODES_ID,                     /* max */
19235   0                                 /* block */
19236 );
19237 
19238 static const char* slave_conflict_role_names[] =
19239 {
19240   "NONE",
19241   "SECONDARY",
19242   "PRIMARY",
19243   "PASS",
19244   NullS
19245 };
19246 
19247 static TYPELIB slave_conflict_role_typelib =
19248 {
19249   array_elements(slave_conflict_role_names) - 1,
19250   "",
19251   slave_conflict_role_names,
19252   NULL
19253 };
19254 
19255 
19256 /**
19257  * slave_conflict_role_check_func.
19258  *
19259  * Perform most validation of a role change request.
19260  * Inspired by sql_plugin.cc::check_func_enum()
19261  */
slave_conflict_role_check_func(THD * thd,struct st_mysql_sys_var * var,void * save,st_mysql_value * value)19262 static int slave_conflict_role_check_func(THD *thd, struct st_mysql_sys_var *var,
19263                                           void *save, st_mysql_value *value)
19264 {
19265   char buff[STRING_BUFFER_USUAL_SIZE];
19266   const char *str;
19267   long long tmp;
19268   long result;
19269   int length;
19270 
19271   do
19272   {
19273     if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING)
19274     {
19275       length= sizeof(buff);
19276       if (!(str= value->val_str(value, buff, &length)))
19277         break;
19278       if ((result= (long)find_type(str, &slave_conflict_role_typelib, 0) - 1) < 0)
19279         break;
19280     }
19281     else
19282     {
19283       if (value->val_int(value, &tmp))
19284         break;
19285       if (tmp < 0 || tmp >= slave_conflict_role_typelib.count)
19286         break;
19287       result= (long) tmp;
19288     }
19289 
19290     const char* failure_cause_str = NULL;
19291     if (!st_ndb_slave_state::checkSlaveConflictRoleChange(
19292                (enum_slave_conflict_role) opt_ndb_slave_conflict_role,
19293                (enum_slave_conflict_role) result,
19294                &failure_cause_str))
19295     {
19296       char msgbuf[256];
19297       my_snprintf(msgbuf,
19298                   sizeof(msgbuf),
19299                   "Role change from %s to %s failed : %s",
19300                   get_type(&slave_conflict_role_typelib, opt_ndb_slave_conflict_role),
19301                   get_type(&slave_conflict_role_typelib, result),
19302                   failure_cause_str);
19303 
19304       thd->raise_error_printf(ER_ERROR_WHEN_EXECUTING_COMMAND,
19305                               "SET GLOBAL ndb_slave_conflict_role",
19306                               msgbuf);
19307 
19308       break;
19309     }
19310 
19311     /* Ok */
19312     *(long*)save= result;
19313     return 0;
19314   } while (0);
19315   /* Error */
19316   return 1;
19317 };
19318 
19319 /**
19320  * slave_conflict_role_update_func
19321  *
19322  * Perform actual change of role, using saved 'long' enum value
19323  * prepared by the update func above.
19324  *
19325  * Inspired by sql_plugin.cc::update_func_long()
19326  */
slave_conflict_role_update_func(THD * thd,struct st_mysql_sys_var * var,void * tgt,const void * save)19327 static void slave_conflict_role_update_func(THD *thd, struct st_mysql_sys_var *var,
19328                                             void *tgt, const void *save)
19329 {
19330   *(long *)tgt= *(long *) save;
19331 };
19332 
19333 static MYSQL_SYSVAR_ENUM(
19334   slave_conflict_role,               /* Name */
19335   opt_ndb_slave_conflict_role,       /* Var */
19336   PLUGIN_VAR_RQCMDARG,
19337   "Role for Slave to play in asymmetric conflict algorithms.",
19338   slave_conflict_role_check_func,    /* Check func */
19339   slave_conflict_role_update_func,   /* Update func */
19340   SCR_NONE,                          /* Default value */
19341   &slave_conflict_role_typelib       /* typelib */
19342 );
19343 
19344 #ifndef NDEBUG
19345 
19346 static
19347 void
dbg_check_shares_update(THD *,st_mysql_sys_var *,void *,const void *)19348 dbg_check_shares_update(THD*, st_mysql_sys_var*, void*, const void*)
19349 {
19350   sql_print_information("dbug_check_shares open:");
19351   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
19352   {
19353     NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
19354     sql_print_information("  %s.%s: state: %s(%u) use_count: %u",
19355                           share->db, share->table_name,
19356                           get_share_state_string(share->state),
19357                           (unsigned)share->state,
19358                           share->use_count);
19359     assert(share->state != NSS_DROPPED);
19360   }
19361 
19362   sql_print_information("dbug_check_shares dropped:");
19363   for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
19364   {
19365     NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
19366     sql_print_information("  %s.%s: state: %s(%u) use_count: %u",
19367                           share->db, share->table_name,
19368                           get_share_state_string(share->state),
19369                           (unsigned)share->state,
19370                           share->use_count);
19371     assert(share->state == NSS_DROPPED);
19372   }
19373 
19374   /**
19375    * Only shares in mysql database may be open...
19376    */
19377   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
19378   {
19379     NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
19380     assert(strcmp(share->db, "mysql") == 0);
19381   }
19382 
19383   /**
19384    * Only shares in mysql database may be open...
19385    */
19386   for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
19387   {
19388     NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
19389     assert(strcmp(share->db, "mysql") == 0);
19390   }
19391 }
19392 
19393 static MYSQL_THDVAR_UINT(
19394   dbg_check_shares,                  /* name */
19395   PLUGIN_VAR_RQCMDARG,
19396   "Debug, only...check that no shares are lingering...",
19397   NULL,                              /* check func */
19398   dbg_check_shares_update,           /* update func */
19399   0,                                 /* default */
19400   0,                                 /* min */
19401   1,                                 /* max */
19402   0                                  /* block */
19403 );
19404 
19405 #endif
19406 
19407 static struct st_mysql_sys_var* system_variables[]= {
19408   MYSQL_SYSVAR(cache_check_time),
19409   MYSQL_SYSVAR(extra_logging),
19410   MYSQL_SYSVAR(wait_connected),
19411   MYSQL_SYSVAR(wait_setup),
19412   MYSQL_SYSVAR(cluster_connection_pool),
19413   MYSQL_SYSVAR(recv_thread_activation_threshold),
19414   MYSQL_SYSVAR(recv_thread_cpu_mask),
19415   MYSQL_SYSVAR(report_thresh_binlog_mem_usage),
19416   MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),
19417   MYSQL_SYSVAR(eventbuffer_max_alloc),
19418   MYSQL_SYSVAR(eventbuffer_free_percent),
19419   MYSQL_SYSVAR(log_update_as_write),
19420   MYSQL_SYSVAR(log_updated_only),
19421   MYSQL_SYSVAR(log_update_minimal),
19422   MYSQL_SYSVAR(log_empty_update),
19423   MYSQL_SYSVAR(log_orig),
19424   MYSQL_SYSVAR(distribution),
19425   MYSQL_SYSVAR(autoincrement_prefetch_sz),
19426   MYSQL_SYSVAR(force_send),
19427   MYSQL_SYSVAR(use_exact_count),
19428   MYSQL_SYSVAR(use_transactions),
19429   MYSQL_SYSVAR(use_copying_alter_table),
19430   MYSQL_SYSVAR(optimized_node_selection),
19431   MYSQL_SYSVAR(batch_size),
19432   MYSQL_SYSVAR(optimization_delay),
19433   MYSQL_SYSVAR(index_stat_enable),
19434   MYSQL_SYSVAR(index_stat_option),
19435   MYSQL_SYSVAR(table_no_logging),
19436   MYSQL_SYSVAR(table_temporary),
19437   MYSQL_SYSVAR(log_bin),
19438   MYSQL_SYSVAR(log_binlog_index),
19439   MYSQL_SYSVAR(log_empty_epochs),
19440   MYSQL_SYSVAR(log_apply_status),
19441   MYSQL_SYSVAR(log_transaction_id),
19442   MYSQL_SYSVAR(connectstring),
19443   MYSQL_SYSVAR(mgmd_host),
19444   MYSQL_SYSVAR(nodeid),
19445   MYSQL_SYSVAR(blob_read_batch_bytes),
19446   MYSQL_SYSVAR(blob_write_batch_bytes),
19447   MYSQL_SYSVAR(deferred_constraints),
19448   MYSQL_SYSVAR(join_pushdown),
19449   MYSQL_SYSVAR(log_exclusive_reads),
19450 #ifndef NDEBUG
19451   MYSQL_SYSVAR(dbg_check_shares),
19452 #endif
19453   MYSQL_SYSVAR(version),
19454   MYSQL_SYSVAR(version_string),
19455   MYSQL_SYSVAR(show_foreign_key_mock_tables),
19456   MYSQL_SYSVAR(slave_conflict_role),
19457   NULL
19458 };
19459 
19460 struct st_mysql_storage_engine ndbcluster_storage_engine=
19461 { MYSQL_HANDLERTON_INTERFACE_VERSION };
19462 
19463 
19464 extern struct st_mysql_plugin i_s_ndb_transid_mysql_connection_map_plugin;
19465 extern struct st_mysql_plugin ndbinfo_plugin;
19466 
mysql_declare_plugin(ndbcluster)19467 mysql_declare_plugin(ndbcluster)
19468 {
19469   MYSQL_STORAGE_ENGINE_PLUGIN,
19470   &ndbcluster_storage_engine,
19471   ndbcluster_hton_name,
19472   "MySQL AB",
19473   "Clustered, fault-tolerant tables",
19474   PLUGIN_LICENSE_GPL,
19475   ndbcluster_init,            /* plugin init */
19476   NULL,                       /* plugin deinit */
19477   0x0100,                     /* plugin version */
19478   ndb_status_variables_export,/* status variables                */
19479   system_variables,           /* system variables */
19480   NULL,                       /* config options */
19481   0                           /* flags */
19482 },
19483 ndbinfo_plugin, /* ndbinfo plugin */
19484 /* IS plugin table which maps between mysql connection id and ndb trans-id */
19485 i_s_ndb_transid_mysql_connection_map_plugin
19486 mysql_declare_plugin_end;
19487 
19488