1 /* Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   This file defines the NDB Cluster handler: the interface between
28   MySQL and NDB Cluster
29 */
30 
31 #include "ha_ndbcluster_glue.h"
32 
33 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
34 #include "ha_ndbcluster.h"
35 #include <ndbapi/NdbApi.hpp>
36 #include <util/Bitmask.hpp>
37 #include <ndbapi/NdbIndexStat.hpp>
38 #include <ndbapi/NdbInterpretedCode.hpp>
39 #include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
40 #include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
41 
42 #include "ha_ndbcluster_binlog.h"
43 #include "ha_ndbcluster_push.h"
44 #include "ha_ndbcluster_cond.h"
45 #include "ha_ndbcluster_tables.h"
46 #include "ha_ndbcluster_connection.h"
47 #include "ndb_thd.h"
48 #include "ndb_table_guard.h"
49 #include "ndb_global_schema_lock.h"
50 #include "ndb_global_schema_lock_guard.h"
51 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
52 #include "abstract_query_plan.h"
53 #endif
54 #include "ndb_dist_priv_util.h"
55 #include "ha_ndb_index_stat.h"
56 
57 #include <mysql/plugin.h>
58 #include <ndb_version.h>
59 #include "ndb_mi.h"
60 
61 // ndb interface initialization/cleanup
62 extern "C" void ndb_init_internal();
63 extern "C" void ndb_end_internal();
64 
65 static const int DEFAULT_PARALLELISM= 0;
66 static const ha_rows DEFAULT_AUTO_PREFETCH= 32;
67 static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;
68 
69 ulong opt_ndb_extra_logging;
70 static ulong opt_ndb_wait_connected;
71 ulong opt_ndb_wait_setup;
72 static ulong opt_ndb_cache_check_time;
73 static uint opt_ndb_cluster_connection_pool;
74 static char* opt_ndb_index_stat_option;
75 static char* opt_ndb_connectstring;
76 static uint opt_ndb_nodeid;
77 
78 static MYSQL_THDVAR_UINT(
79   autoincrement_prefetch_sz,         /* name */
80   PLUGIN_VAR_RQCMDARG,
81   "Specify number of autoincrement values that are prefetched.",
82   NULL,                              /* check func. */
83   NULL,                              /* update func. */
84   1,                                 /* default */
85   1,                                 /* min */
86   65535,                             /* max */
87   0                                  /* block */
88 );
89 
90 
91 static MYSQL_THDVAR_BOOL(
92   force_send,                        /* name */
93   PLUGIN_VAR_OPCMDARG,
94   "Force send of buffers to ndb immediately without waiting for "
95   "other threads.",
96   NULL,                              /* check func. */
97   NULL,                              /* update func. */
98   1                                  /* default */
99 );
100 
101 
102 static MYSQL_THDVAR_BOOL(
103   use_exact_count,                   /* name */
104   PLUGIN_VAR_OPCMDARG,
105   "Use exact records count during query planning and for fast "
106   "select count(*), disable for faster queries.",
107   NULL,                              /* check func. */
108   NULL,                              /* update func. */
109   0                                  /* default */
110 );
111 
112 
113 static MYSQL_THDVAR_BOOL(
114   use_transactions,                  /* name */
115   PLUGIN_VAR_OPCMDARG,
116   "Use transactions for large inserts, if enabled then large "
117   "inserts will be split into several smaller transactions",
118   NULL,                              /* check func. */
119   NULL,                              /* update func. */
120   1                                  /* default */
121 );
122 
123 
124 static MYSQL_THDVAR_BOOL(
125   use_copying_alter_table,           /* name */
126   PLUGIN_VAR_OPCMDARG,
127   "Force ndbcluster to always copy tables at alter table (should "
128   "only be used if on-line alter table fails).",
129   NULL,                              /* check func. */
130   NULL,                              /* update func. */
131   0                                  /* default */
132 );
133 
134 
135 static MYSQL_THDVAR_UINT(
136   optimized_node_selection,          /* name */
137   PLUGIN_VAR_OPCMDARG,
138   "Select nodes for transactions in a more optimal way.",
139   NULL,                              /* check func. */
140   NULL,                              /* update func. */
141   3,                                 /* default */
142   0,                                 /* min */
143   3,                                 /* max */
144   0                                  /* block */
145 );
146 
147 
148 static MYSQL_THDVAR_ULONG(
149   batch_size,                        /* name */
150   PLUGIN_VAR_RQCMDARG,
151   "Batch size in bytes.",
152   NULL,                              /* check func. */
153   NULL,                              /* update func. */
154   32768,                             /* default */
155   0,                                 /* min */
156   ONE_YEAR_IN_SECONDS,               /* max */
157   0                                  /* block */
158 );
159 
160 
161 static MYSQL_THDVAR_ULONG(
162   optimization_delay,                /* name */
163   PLUGIN_VAR_RQCMDARG,
164   "For optimize table, specifies the delay in milliseconds "
165   "for each batch of rows sent.",
166   NULL,                              /* check func. */
167   NULL,                              /* update func. */
168   10,                                /* default */
169   0,                                 /* min */
170   100000,                            /* max */
171   0                                  /* block */
172 );
173 
174 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
175 #define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE
176 #else
177 #define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE
178 #endif
179 
180 static MYSQL_THDVAR_BOOL(
181   index_stat_enable,                 /* name */
182   PLUGIN_VAR_OPCMDARG,
183   "Use ndb index statistics in query optimization.",
184   NULL,                              /* check func. */
185   NULL,                              /* update func. */
186   DEFAULT_NDB_INDEX_STAT_ENABLE      /* default */
187 );
188 
189 
190 static MYSQL_THDVAR_ULONG(
191   index_stat_cache_entries,          /* name */
192   PLUGIN_VAR_NOCMDARG,
193   "Obsolete (ignored and will be removed later).",
194   NULL,                              /* check func. */
195   NULL,                              /* update func. */
196   32,                                /* default */
197   0,                                 /* min */
198   ULONG_MAX,                         /* max */
199   0                                  /* block */
200 );
201 
202 
203 static MYSQL_THDVAR_ULONG(
204   index_stat_update_freq,            /* name */
205   PLUGIN_VAR_NOCMDARG,
206   "Obsolete (ignored and will be removed later).",
207   NULL,                              /* check func. */
208   NULL,                              /* update func. */
209   20,                                /* default */
210   0,                                 /* min */
211   ULONG_MAX,                         /* max */
212   0                                  /* block */
213 );
214 
215 
216 static MYSQL_THDVAR_BOOL(
217   table_no_logging,                  /* name */
218   PLUGIN_VAR_NOCMDARG,
219   "",
220   NULL,                              /* check func. */
221   NULL,                              /* update func. */
222   FALSE                              /* default */
223 );
224 
225 
226 static MYSQL_THDVAR_BOOL(
227   table_temporary,                   /* name */
228   PLUGIN_VAR_NOCMDARG,
229   "",
230   NULL,                              /* check func. */
231   NULL,                              /* update func. */
232   FALSE                              /* default */
233 );
234 
235 static MYSQL_THDVAR_UINT(
236   blob_read_batch_bytes,             /* name */
237   PLUGIN_VAR_RQCMDARG,
238   "Specifies the bytesize large Blob reads "
239   "should be batched into.  0 == No limit.",
240   NULL,                              /* check func */
241   NULL,                              /* update func */
242   65536,                             /* default */
243   0,                                 /* min */
244   UINT_MAX,                          /* max */
245   0                                  /* block */
246 );
247 
248 static MYSQL_THDVAR_UINT(
249   blob_write_batch_bytes,            /* name */
250   PLUGIN_VAR_RQCMDARG,
251   "Specifies the bytesize large Blob writes "
252   "should be batched into.  0 == No limit.",
253   NULL,                              /* check func */
254   NULL,                              /* update func */
255   65536,                             /* default */
256   0,                                 /* min */
257   UINT_MAX,                          /* max */
258   0                                  /* block */
259 );
260 
261 static MYSQL_THDVAR_UINT(
262   deferred_constraints,              /* name */
263   PLUGIN_VAR_RQCMDARG,
264   "Specified that constraints should be checked deferred (when supported)",
265   NULL,                              /* check func */
266   NULL,                              /* update func */
267   0,                                 /* default */
268   0,                                 /* min */
269   1,                                 /* max */
270   0                                  /* block */
271 );
272 
273 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
274 #define DEFAULT_NDB_JOIN_PUSHDOWN FALSE
275 #else
276 #define DEFAULT_NDB_JOIN_PUSHDOWN TRUE
277 #endif
278 
279 static MYSQL_THDVAR_BOOL(
280   join_pushdown,                     /* name */
281   PLUGIN_VAR_OPCMDARG,
282   "Enable pushing down of join to datanodes",
283   NULL,                              /* check func. */
284   NULL,                              /* update func. */
285   DEFAULT_NDB_JOIN_PUSHDOWN          /* default */
286 );
287 
288 /*
289   Required in index_stat.cc but available only from here
290   thanks to use of top level anonymous structs.
291 */
ndb_index_stat_get_enable(THD * thd)292 bool ndb_index_stat_get_enable(THD *thd)
293 {
294   const bool value = THDVAR(thd, index_stat_enable);
295   return value;
296 }
297 
298 static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
299 static bool ndbcluster_show_status(handlerton *hton, THD*,
300                                    stat_print_fn *,
301                                    enum ha_stat_type);
302 static int ndbcluster_alter_tablespace(handlerton *hton,
303                                        THD* thd,
304                                        st_alter_tablespace *info);
305 static int ndbcluster_fill_files_table(handlerton *hton,
306                                        THD *thd,
307                                        TABLE_LIST *tables,
308                                        Item *cond);
309 
310 #if MYSQL_VERSION_ID >= 50501
311 /**
312    Used to fill in INFORMATION_SCHEMA* tables.
313 
314    @param hton handle to the handlerton structure
315    @param thd the thread/connection descriptor
316    @param[in,out] tables the information schema table that is filled up
317    @param cond used for conditional pushdown to storage engine
318    @param schema_table_idx the table id that distinguishes the type of table
319 
320    @return Operation status
321  */
322 static int
ndbcluster_fill_is_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond,enum enum_schema_tables schema_table_idx)323 ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,
324                          Item *cond, enum enum_schema_tables schema_table_idx)
325 {
326   if (schema_table_idx == SCH_FILES)
327     return  ndbcluster_fill_files_table(hton, thd, tables, cond);
328   return 0;
329 }
330 #endif
331 
332 
333 handlerton *ndbcluster_hton;
334 
ndbcluster_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)335 static handler *ndbcluster_create_handler(handlerton *hton,
336                                           TABLE_SHARE *table,
337                                           MEM_ROOT *mem_root)
338 {
339   return new (mem_root) ha_ndbcluster(hton, table);
340 }
341 
342 static uint
ndbcluster_partition_flags()343 ndbcluster_partition_flags()
344 {
345   return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY |
346           HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
347 }
348 
349 #ifndef NDB_WITHOUT_ONLINE_ALTER
350 static uint
ndbcluster_alter_table_flags(uint flags)351 ndbcluster_alter_table_flags(uint flags)
352 {
353   if (flags & ALTER_DROP_PARTITION)
354     return 0;
355   else
356     return (HA_PARTITION_FUNCTION_SUPPORTED);
357 }
358 #else
359 static uint
ndbcluster_alter_table_flags(uint flags)360 ndbcluster_alter_table_flags(uint flags)
361 {
362   const uint f=
363     HA_PARTITION_FUNCTION_SUPPORTED |
364     0;
365 
366   if (flags & Alter_info::ALTER_DROP_PARTITION)
367     return 0;
368 
369   return f;
370 }
371 #endif
372 
373 #define NDB_AUTO_INCREMENT_RETRIES 100
374 #define BATCH_FLUSH_SIZE (32768)
375 /*
376   Room for 10 instruction words, two labels (@ 2words/label)
377   + 2 extra words for the case of resolve_size == 8
378 */
379 #define MAX_CONFLICT_INTERPRETED_PROG_SIZE 16
380 
381 static int ndb_to_mysql_error(const NdbError *ndberr);
382 
383 #define ERR_PRINT(err) \
384   DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
385 
386 #define ERR_RETURN(err)                  \
387 {                                        \
388   const NdbError& tmp= err;              \
389   DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
390 }
391 
392 #define ERR_BREAK(err, code)             \
393 {                                        \
394   const NdbError& tmp= err;              \
395   code= ndb_to_mysql_error(&tmp);        \
396   break;                                 \
397 }
398 
399 #define ERR_SET(err, code)               \
400 {                                        \
401   const NdbError& tmp= err;              \
402   code= ndb_to_mysql_error(&tmp);        \
403 }
404 
405 static int ndbcluster_inited= 0;
406 int ndbcluster_terminating= 0;
407 
408 /*
409    Indicator and CONDVAR used to delay client and slave
410    connections until Ndb has Binlog setup
411    (bug#46955)
412 */
413 int ndb_setup_complete= 0;
414 pthread_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex
415 
416 extern Ndb* g_ndb;
417 
418 uchar g_node_id_map[max_ndb_nodes];
419 
420 /// Handler synchronization
421 pthread_mutex_t ndbcluster_mutex;
422 
423 /// Table lock handling
424 HASH ndbcluster_open_tables;
425 
426 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
427                                 my_bool not_used MY_ATTRIBUTE((unused)));
428 
429 static void modify_shared_stats(NDB_SHARE *share,
430                                 Ndb_local_table_statistics *local_stat);
431 
432 static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,
433                                     const NdbRecord *, struct Ndb_statistics *,
434                                     bool have_lock= FALSE,
435                                     uint part_id= ~(uint)0);
436 
437 THD *injector_thd= 0;
438 
439 // Util thread variables
440 pthread_t ndb_util_thread;
441 int ndb_util_thread_running= 0;
442 pthread_mutex_t LOCK_ndb_util_thread;
443 pthread_cond_t COND_ndb_util_thread;
444 pthread_cond_t COND_ndb_util_ready;
445 pthread_handler_t ndb_util_thread_func(void *arg);
446 
447 // Index stats thread variables
448 pthread_t ndb_index_stat_thread;
449 int ndb_index_stat_thread_running= 0;
450 pthread_mutex_t LOCK_ndb_index_stat_thread;
451 pthread_cond_t COND_ndb_index_stat_thread;
452 pthread_cond_t COND_ndb_index_stat_ready;
453 pthread_mutex_t ndb_index_stat_list_mutex;
454 pthread_mutex_t ndb_index_stat_stat_mutex;
455 pthread_cond_t ndb_index_stat_stat_cond;
456 pthread_handler_t ndb_index_stat_thread_func(void *arg);
457 
458 extern void ndb_index_stat_free(NDB_SHARE *share);
459 extern void ndb_index_stat_end();
460 
461 /* Status variables shown with 'show status like 'Ndb%' */
462 
463 struct st_ndb_status g_ndb_status;
464 
465 long g_ndb_status_index_stat_cache_query = 0;
466 long g_ndb_status_index_stat_cache_clean = 0;
467 
468 long long g_event_data_count = 0;
469 long long g_event_nondata_count = 0;
470 long long g_event_bytes_count = 0;
471 
472 static long long g_slave_api_client_stats[Ndb::NumClientStatistics];
473 
474 static long long g_server_api_client_stats[Ndb::NumClientStatistics];
475 
476 void
update_slave_api_stats(Ndb * ndb)477 update_slave_api_stats(Ndb* ndb)
478 {
479   for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)
480     g_slave_api_client_stats[i] = ndb->getClientStat(i);
481 }
482 
483 st_ndb_slave_state g_ndb_slave_state;
484 
st_ndb_slave_state()485 st_ndb_slave_state::st_ndb_slave_state()
486   : current_conflict_defined_op_count(0),
487     current_master_server_epoch(0),
488     current_max_rep_epoch(0),
489     max_rep_epoch(0),
490     sql_run_id(~Uint32(0))
491 {
492   memset(current_violation_count, 0, sizeof(current_violation_count));
493   memset(total_violation_count, 0, sizeof(total_violation_count));
494 };
495 
496 void
atTransactionAbort()497 st_ndb_slave_state::atTransactionAbort()
498 {
499   /* Reset current-transaction counters + state */
500   memset(current_violation_count, 0, sizeof(current_violation_count));
501   current_conflict_defined_op_count = 0;
502   current_max_rep_epoch = 0;
503 }
504 
505 void
atTransactionCommit()506 st_ndb_slave_state::atTransactionCommit()
507 {
508   /* Merge committed transaction counters into total state
509    * Then reset current transaction counters
510    */
511   for (int i=0; i < CFT_NUMBER_OF_CFTS; i++)
512   {
513     total_violation_count[i]+= current_violation_count[i];
514     current_violation_count[i] = 0;
515   }
516   current_conflict_defined_op_count = 0;
517   if (current_max_rep_epoch > max_rep_epoch)
518   {
519     DBUG_PRINT("info", ("Max replicated epoch increases from %llu to %llu",
520                         max_rep_epoch,
521                         current_max_rep_epoch));
522 
523     max_rep_epoch = current_max_rep_epoch;
524   }
525   current_max_rep_epoch = 0;
526 }
527 
528 void
atApplyStatusWrite(Uint32 master_server_id,Uint32 row_server_id,Uint64 row_epoch,bool is_row_server_id_local)529 st_ndb_slave_state::atApplyStatusWrite(Uint32 master_server_id,
530                                        Uint32 row_server_id,
531                                        Uint64 row_epoch,
532                                        bool is_row_server_id_local)
533 {
534   if (row_server_id == master_server_id)
535   {
536     /*
537        WRITE_ROW to ndb_apply_status injected by MySQLD
538        immediately upstream of us.
539        Record epoch
540     */
541     current_master_server_epoch = row_epoch;
542     assert(! is_row_server_id_local);
543   }
544   else if (is_row_server_id_local)
545   {
546     DBUG_PRINT("info", ("Recording application of local server %u epoch %llu "
547                         " which is %s.",
548                         row_server_id, row_epoch,
549                         (row_epoch > g_ndb_slave_state.current_max_rep_epoch)?
550                         " new highest." : " older than previously applied"));
551     if (row_epoch > current_max_rep_epoch)
552     {
553       /*
554         Store new highest epoch in thdvar.  If we commit successfully
555         then this can become the new global max
556       */
557       current_max_rep_epoch = row_epoch;
558     }
559   }
560 }
561 
562 void
atResetSlave()563 st_ndb_slave_state::atResetSlave()
564 {
565   /* Reset the Maximum replicated epoch vars
566    * on slave reset
567    * No need to touch the sql_run_id as that
568    * will increment if the slave is started
569    * again.
570    */
571   current_max_rep_epoch = 0;
572   max_rep_epoch = 0;
573 }
574 
check_slave_state(THD * thd)575 static int check_slave_state(THD* thd)
576 {
577   DBUG_ENTER("check_slave_state");
578 
579 #ifdef HAVE_NDB_BINLOG
580   if (!thd->slave_thread)
581     DBUG_RETURN(0);
582 
583   const Uint32 runId = ndb_mi_get_slave_run_id();
584   DBUG_PRINT("info", ("Slave SQL thread run id is %u",
585                       runId));
586   if (unlikely(runId != g_ndb_slave_state.sql_run_id))
587   {
588     DBUG_PRINT("info", ("Slave run id changed from %u, "
589                         "treating as Slave restart",
590                         g_ndb_slave_state.sql_run_id));
591     g_ndb_slave_state.sql_run_id = runId;
592 
593     /* Always try to load the Max Replicated Epoch info
594      * first.
595      * Could be made optional if it's a problem
596      */
597     {
598       /*
599          Load highest replicated epoch from a local
600          MySQLD from the cluster.
601       */
602       DBUG_PRINT("info", ("Loading applied epoch information from %s",
603                           NDB_APPLY_TABLE));
604       NdbError ndb_error;
605       Uint64 highestAppliedEpoch = 0;
606       do
607       {
608         Ndb* ndb= check_ndb_in_thd(thd);
609         NDBDICT* dict= ndb->getDictionary();
610         NdbTransaction* trans= NULL;
611         ndb->setDatabaseName(NDB_REP_DB);
612         Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
613 
614         const NDBTAB* ndbtab= ndbtab_g.get_table();
615         if (unlikely(ndbtab == NULL))
616         {
617           ndb_error = dict->getNdbError();
618           break;
619         }
620 
621         trans= ndb->startTransaction();
622         if (unlikely(trans == NULL))
623         {
624           ndb_error = ndb->getNdbError();
625           break;
626         }
627 
628         do
629         {
630           NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);
631           if (unlikely(sop == NULL))
632           {
633             ndb_error = trans->getNdbError();
634             break;
635           }
636 
637           const Uint32 server_id_col_num = 0;
638           const Uint32 epoch_col_num = 1;
639           NdbRecAttr* server_id_ra = 0;
640           NdbRecAttr* epoch_ra = 0;
641 
642           if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0)   ||
643                        ((server_id_ra = sop->getValue(server_id_col_num)) == NULL)  ||
644                        ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))
645           {
646             ndb_error = sop->getNdbError();
647             break;
648           }
649 
650           if (trans->execute(NdbTransaction::Commit))
651           {
652             ndb_error = trans->getNdbError();
653             break;
654           }
655 
656           int rc = 0;
657           while (0 == (rc= sop->nextResult(true)))
658           {
659             Uint32 serverid = server_id_ra->u_32_value();
660             Uint64 epoch = epoch_ra->u_64_value();
661 
662             if ((serverid == ::server_id) ||
663                 (ndb_mi_get_ignore_server_id(serverid)))
664             {
665               highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);
666             }
667           }
668 
669           if (rc != 1)
670           {
671             ndb_error = sop->getNdbError();
672             break;
673           }
674         } while (0);
675 
676         trans->close();
677       } while(0);
678 
679       if (ndb_error.code != 0)
680       {
681         sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "
682                           "at Slave start, error %u %s",
683                           NDB_REP_DB,
684                           NDB_APPLY_TABLE,
685                           ndb_error.code, ndb_error.message);
686       }
687 
688       /*
689         Set Global status variable to the Highest Applied Epoch from
690         the Cluster DB.
691         If none was found, this will be zero.
692       */
693       g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;
694       sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",
695                             g_ndb_slave_state.max_rep_epoch,
696                             (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),
697                             (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));
698     } // Load highest replicated epoch
699   } // New Slave SQL thread run id
700 #endif
701 
702   DBUG_RETURN(0);
703 }
704 
705 
update_status_variables(Thd_ndb * thd_ndb,st_ndb_status * ns,Ndb_cluster_connection * c)706 static int update_status_variables(Thd_ndb *thd_ndb,
707                                    st_ndb_status *ns,
708                                    Ndb_cluster_connection *c)
709 {
710   ns->connected_port= c->get_connected_port();
711   ns->connected_host= c->get_connected_host();
712   if (ns->cluster_node_id != (int) c->node_id())
713   {
714     ns->cluster_node_id= c->node_id();
715     if (&g_ndb_status == ns && g_ndb_cluster_connection == c)
716       sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",
717                             ns->cluster_node_id, ns->connected_host,
718                             ns->connected_port);
719   }
720   ns->number_of_replicas= 0;
721   {
722     int n= c->get_no_ready();
723     ns->number_of_ready_data_nodes= n > 0 ?  n : 0;
724   }
725   ns->number_of_data_nodes= c->no_db_nodes();
726   ns->connect_count= c->get_connect_count();
727   if (thd_ndb)
728   {
729     ns->execute_count= thd_ndb->m_execute_count;
730     ns->scan_count= thd_ndb->m_scan_count;
731     ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;
732     ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;
733     ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;
734     ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;
735     ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;
736     ns->pushed_reads= thd_ndb->m_pushed_reads;
737     for (int i= 0; i < MAX_NDB_NODES; i++)
738     {
739       ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];
740       ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];
741     }
742     for (int i=0; i < Ndb::NumClientStatistics; i++)
743     {
744       ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);
745     }
746     ns->schema_locks_count= thd_ndb->schema_locks_count;
747   }
748   return 0;
749 }
750 
751 /* Helper macro for definitions of NdbApi status variables */
752 
753 #define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION)                    \
754   {"api_wait_exec_complete_count" NAME_SUFFIX,                          \
755    (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ],                \
756    SHOW_LONGLONG},                                                      \
757   {"api_wait_scan_result_count" NAME_SUFFIX,                            \
758    (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ],                  \
759    SHOW_LONGLONG},                                                      \
760   {"api_wait_meta_request_count" NAME_SUFFIX,                           \
761    (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ],                 \
762    SHOW_LONGLONG},                                                      \
763   {"api_wait_nanos_count" NAME_SUFFIX,                                  \
764    (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ],                       \
765    SHOW_LONGLONG},                                                      \
766   {"api_bytes_sent_count" NAME_SUFFIX,                                  \
767    (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ],                       \
768    SHOW_LONGLONG},                                                      \
769   {"api_bytes_received_count" NAME_SUFFIX,                              \
770    (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ],                      \
771    SHOW_LONGLONG},                                                      \
772   {"api_trans_start_count" NAME_SUFFIX,                                 \
773    (char*) ARRAY_LOCATION[ Ndb::TransStartCount ],                      \
774    SHOW_LONGLONG},                                                      \
775   {"api_trans_commit_count" NAME_SUFFIX,                                \
776    (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ],                     \
777    SHOW_LONGLONG},                                                      \
778   {"api_trans_abort_count" NAME_SUFFIX,                                 \
779    (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ],                      \
780    SHOW_LONGLONG},                                                      \
781   {"api_trans_close_count" NAME_SUFFIX,                                 \
782    (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ],                      \
783    SHOW_LONGLONG},                                                      \
784   {"api_pk_op_count" NAME_SUFFIX,                                       \
785    (char*) ARRAY_LOCATION[ Ndb::PkOpCount ],                            \
786    SHOW_LONGLONG},                                                      \
787   {"api_uk_op_count" NAME_SUFFIX,                                       \
788    (char*) ARRAY_LOCATION[ Ndb::UkOpCount ],                            \
789    SHOW_LONGLONG},                                                      \
790   {"api_table_scan_count" NAME_SUFFIX,                                  \
791    (char*) ARRAY_LOCATION[ Ndb::TableScanCount ],                       \
792    SHOW_LONGLONG},                                                      \
793   {"api_range_scan_count" NAME_SUFFIX,                                  \
794    (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ],                       \
795    SHOW_LONGLONG},                                                      \
796   {"api_pruned_scan_count" NAME_SUFFIX,                                 \
797    (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ],                      \
798    SHOW_LONGLONG},                                                      \
799   {"api_scan_batch_count" NAME_SUFFIX,                                  \
800    (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ],                       \
801    SHOW_LONGLONG},                                                      \
802   {"api_read_row_count" NAME_SUFFIX,                                    \
803    (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ],                         \
804    SHOW_LONGLONG},                                                      \
805   {"api_trans_local_read_row_count" NAME_SUFFIX,                        \
806    (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ],               \
807    SHOW_LONGLONG}
808 
809 SHOW_VAR ndb_status_variables_dynamic[]= {
810   {"cluster_node_id",     (char*) &g_ndb_status.cluster_node_id,      SHOW_LONG},
811   {"config_from_host",    (char*) &g_ndb_status.connected_host,       SHOW_CHAR_PTR},
812   {"config_from_port",    (char*) &g_ndb_status.connected_port,       SHOW_LONG},
813 //{"number_of_replicas",  (char*) &g_ndb_status.number_of_replicas,   SHOW_LONG},
814   {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG},
815   {"number_of_ready_data_nodes",
816    (char*) &g_ndb_status.number_of_ready_data_nodes,                  SHOW_LONG},
817   {"connect_count",      (char*) &g_ndb_status.connect_count,         SHOW_LONG},
818   {"execute_count",      (char*) &g_ndb_status.execute_count,         SHOW_LONG},
819   {"scan_count",         (char*) &g_ndb_status.scan_count,            SHOW_LONG},
820   {"pruned_scan_count",  (char*) &g_ndb_status.pruned_scan_count,     SHOW_LONG},
821   {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count,    SHOW_LONG},
822   NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),
823   {"sorted_scan_count",  (char*) &g_ndb_status.sorted_scan_count,     SHOW_LONG},
824   {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined,
825    SHOW_LONG},
826   {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,
827    SHOW_LONG},
828   {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,
829    SHOW_LONG},
830   {"pushed_reads",       (char*) &g_ndb_status.pushed_reads,          SHOW_LONG},
831   {NullS, NullS, SHOW_LONG}
832 };
833 
834 SHOW_VAR ndb_status_conflict_variables[]= {
835   {"fn_max",       (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX], SHOW_LONGLONG},
836   {"fn_old",       (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_OLD], SHOW_LONGLONG},
837   {"fn_max_del_win", (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_MAX_DEL_WIN], SHOW_LONGLONG},
838   {"fn_epoch",     (char*) &g_ndb_slave_state.total_violation_count[CFT_NDB_EPOCH], SHOW_LONGLONG},
839   {NullS, NullS, SHOW_LONG}
840 };
841 
842 SHOW_VAR ndb_status_injector_variables[]= {
843   {"api_event_data_count_injector",     (char*) &g_event_data_count, SHOW_LONGLONG},
844   {"api_event_nondata_count_injector",  (char*) &g_event_nondata_count, SHOW_LONGLONG},
845   {"api_event_bytes_count_injector",    (char*) &g_event_bytes_count, SHOW_LONGLONG},
846   {NullS, NullS, SHOW_LONG}
847 };
848 
849 SHOW_VAR ndb_status_slave_variables[]= {
850   NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),
851   {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG},
852   {NullS, NullS, SHOW_LONG}
853 };
854 
855 SHOW_VAR ndb_status_server_client_stat_variables[]= {
856   NDBAPI_COUNTERS("", &g_server_api_client_stats),
857   {"api_event_data_count",
858    (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ],
859    SHOW_LONGLONG},
860   {"api_event_nondata_count",
861    (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ],
862    SHOW_LONGLONG},
863   {"api_event_bytes_count",
864    (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ],
865    SHOW_LONGLONG},
866   {NullS, NullS, SHOW_LONG}
867 };
868 
show_ndb_server_api_stats(THD * thd,SHOW_VAR * var,char * buff)869 static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)
870 {
871   /* This function is called when SHOW STATUS / INFO_SCHEMA wants
872    * to see one of our status vars
873    * We use this opportunity to :
874    *  1) Update the globals with current values
875    *  2) Return an array of var definitions, pointing to
876    *     the updated globals
877    */
878   ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);
879 
880   var->type= SHOW_ARRAY;
881   var->value= (char*) ndb_status_server_client_stat_variables;
882 
883   return 0;
884 }
885 
886 SHOW_VAR ndb_status_index_stat_variables[]= {
887   {"cache_query",     (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG},
888   {"cache_clean",     (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG},
889   {NullS, NullS, SHOW_LONG}
890 };
891 
892 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
893 static int ndbcluster_make_pushed_join(handlerton *, THD*,AQP::Join_plan*);
894 #endif
895 
896 /*
897   Error handling functions
898 */
899 
900 /* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */
901 
ndb_to_mysql_error(const NdbError * ndberr)902 static int ndb_to_mysql_error(const NdbError *ndberr)
903 {
904   /* read the mysql mapped error code */
905   int error= ndberr->mysql_code;
906 
907   switch (error)
908   {
909     /* errors for which we do not add warnings, just return mapped error code
910     */
911   case HA_ERR_NO_SUCH_TABLE:
912   case HA_ERR_KEY_NOT_FOUND:
913     return error;
914 
915     /* Mapping missing, go with the ndb error code*/
916   case -1:
917     error= ndberr->code;
918     break;
919     /* Mapping exists, go with the mapped code */
920   default:
921     break;
922   }
923 
924   /*
925     If we don't abort directly on warnings push a warning
926     with the internal error information
927    */
928   if (!current_thd->abort_on_warning)
929   {
930     /*
931       Push the NDB error message as warning
932       - Used to be able to use SHOW WARNINGS toget more info on what the error is
933       - Used by replication to see if the error was temporary
934     */
935     if (ndberr->status == NdbError::TemporaryError)
936       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
937                           ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
938                           ndberr->code, ndberr->message, "NDB");
939     else
940       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
941                           ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
942                           ndberr->code, ndberr->message, "NDB");
943   }
944   return error;
945 }
946 
947 #ifdef HAVE_NDB_BINLOG
948 
949 /* Write conflicting row to exceptions table. */
write_conflict_row(NDB_SHARE * share,NdbTransaction * trans,const uchar * row,NdbError & err)950 static int write_conflict_row(NDB_SHARE *share,
951                               NdbTransaction *trans,
952                               const uchar *row,
953                               NdbError& err)
954 {
955   DBUG_ENTER("write_conflict_row");
956 
957   /* get exceptions table */
958   NDB_CONFLICT_FN_SHARE *cfn_share= share->m_cfn_share;
959   const NDBTAB *ex_tab= cfn_share->m_ex_tab;
960   DBUG_ASSERT(ex_tab != NULL);
961 
962   /* get insert op */
963   NdbOperation *ex_op= trans->getNdbOperation(ex_tab);
964   if (ex_op == NULL)
965   {
966     err= trans->getNdbError();
967     DBUG_RETURN(-1);
968   }
969   if (ex_op->insertTuple() == -1)
970   {
971     err= ex_op->getNdbError();
972     DBUG_RETURN(-1);
973   }
974   {
975     uint32 server_id= (uint32)::server_id;
976     uint32 master_server_id= (uint32) ndb_mi_get_master_server_id();
977     uint64 master_epoch= (uint64) g_ndb_slave_state.current_master_server_epoch;
978     uint32 count= (uint32)++(cfn_share->m_count);
979     if (ex_op->setValue((Uint32)0, (const char *)&(server_id)) ||
980         ex_op->setValue((Uint32)1, (const char *)&(master_server_id)) ||
981         ex_op->setValue((Uint32)2, (const char *)&(master_epoch)) ||
982         ex_op->setValue((Uint32)3, (const char *)&(count)))
983     {
984       err= ex_op->getNdbError();
985       DBUG_RETURN(-1);
986     }
987   }
988   /* copy primary keys */
989   {
990     const int fixed_cols= 4;
991     int nkey= cfn_share->m_pk_cols;
992     int k;
993     for (k= 0; k < nkey; k++)
994     {
995       DBUG_ASSERT(row != NULL);
996       const uchar* data= row + cfn_share->m_offset[k];
997       if (ex_op->setValue((Uint32)(fixed_cols + k), (const char*)data) == -1)
998       {
999         err= ex_op->getNdbError();
1000         DBUG_RETURN(-1);
1001       }
1002     }
1003   }
1004   DBUG_RETURN(0);
1005 }
1006 #endif
1007 
1008 #ifdef HAVE_NDB_BINLOG
1009 int
1010 handle_conflict_op_error(Thd_ndb* thd_ndb,
1011                          NdbTransaction* trans,
1012                          const NdbError& err,
1013                          const NdbOperation* op);
1014 
1015 int
1016 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
1017                     const char* tab_name,
1018                     const NdbRecord* key_rec,
1019                     const uchar* pk_row,
1020                     enum_conflicting_op_type op_type,
1021                     enum_conflict_cause conflict_cause,
1022                     const NdbError& conflict_error,
1023                     NdbTransaction* conflict_trans,
1024                     NdbError& err);
1025 #endif
1026 
1027 static const Uint32 error_op_after_refresh_op = 920;
1028 
1029 inline int
check_completed_operations_pre_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1030 check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1031                                       const NdbOperation *first,
1032                                       const NdbOperation *last,
1033                                       uint *ignore_count)
1034 {
1035   uint ignores= 0;
1036   DBUG_ENTER("check_completed_operations_pre_commit");
1037 
1038   if (unlikely(first == 0))
1039   {
1040     assert(last == 0);
1041     DBUG_RETURN(0);
1042   }
1043 
1044   /*
1045     Check that all errors are "accepted" errors
1046     or exceptions to report
1047   */
1048 #ifdef HAVE_NDB_BINLOG
1049   const NdbOperation* lastUserOp = trans->getLastDefinedOperation();
1050 #endif
1051   while (true)
1052   {
1053     const NdbError &err= first->getNdbError();
1054     const bool op_has_conflict_detection = (first->getCustomData() != NULL);
1055     if (!op_has_conflict_detection)
1056     {
1057       /* 'Normal path' - ignore key (not) present, others are errors */
1058       if (err.classification != NdbError::NoError &&
1059           err.classification != NdbError::ConstraintViolation &&
1060           err.classification != NdbError::NoDataFound)
1061       {
1062         /* Non ignored error, report it */
1063         DBUG_PRINT("info", ("err.code == %u", err.code));
1064         DBUG_RETURN(err.code);
1065       }
1066     }
1067 #ifdef HAVE_NDB_BINLOG
1068     else
1069     {
1070       /*
1071          Op with conflict detection, use special error handling method
1072        */
1073 
1074       if (err.classification != NdbError::NoError)
1075       {
1076         int res = handle_conflict_op_error(thd_ndb,
1077                                            trans,
1078                                            err,
1079                                            first);
1080         if (res != 0)
1081           DBUG_RETURN(res);
1082       }
1083     } // if (!op_has_conflict_detection)
1084 #endif
1085     if (err.classification != NdbError::NoError)
1086       ignores++;
1087 
1088     if (first == last)
1089       break;
1090 
1091     first= trans->getNextCompletedOperation(first);
1092   }
1093   if (ignore_count)
1094     *ignore_count= ignores;
1095 #ifdef HAVE_NDB_BINLOG
1096   /*
1097      Conflict detection related error handling above may have defined
1098      new operations on the transaction.  If so, execute them now
1099   */
1100   if (trans->getLastDefinedOperation() != lastUserOp)
1101   {
1102     const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();
1103 
1104     if (trans->execute(NdbTransaction::NoCommit,
1105                        NdbOperation::AO_IgnoreError,
1106                        thd_ndb->m_force_send))
1107     {
1108       abort();
1109       //err= trans->getNdbError();
1110     }
1111 
1112     if (trans->getNdbError().code)
1113     {
1114       /* Check the result codes of the operations we added */
1115       const NdbOperation* conflict_op = NULL;
1116       do
1117       {
1118         conflict_op = trans->getNextCompletedOperation(conflict_op);
1119         assert(conflict_op != NULL);
1120         /* We will ignore 920 which represents a refreshOp or other op
1121          * arriving after a refreshOp
1122          */
1123         const NdbError& err = conflict_op->getNdbError();
1124         if ((err.code != 0) &&
1125             (err.code != (int) error_op_after_refresh_op))
1126         {
1127           if (err.status == NdbError::TemporaryError)
1128           {
1129             /* Slave will roll back and retry entire transaction. */
1130             ERR_RETURN(err);
1131           }
1132           else
1133           {
1134             char msg[FN_REFLEN];
1135             my_snprintf(msg, sizeof(msg), "Executing extra operations for "
1136                         "conflict handling hit Ndb error %d '%s'",
1137                         err.code, err.message);
1138             push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_ERROR,
1139                                 ER_EXCEPTIONS_WRITE_ERROR,
1140                                 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
1141             /* Slave will stop replication. */
1142             DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
1143           }
1144         }
1145       } while (conflict_op != last_conflict_op);
1146     }
1147   }
1148 #endif
1149   DBUG_RETURN(0);
1150 }
1151 
1152 inline int
check_completed_operations(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint * ignore_count)1153 check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,
1154                            const NdbOperation *first,
1155                            const NdbOperation *last,
1156                            uint *ignore_count)
1157 {
1158   uint ignores= 0;
1159   DBUG_ENTER("check_completed_operations");
1160 
1161   if (unlikely(first == 0))
1162   {
1163     assert(last == 0);
1164     DBUG_RETURN(0);
1165   }
1166 
1167   /*
1168     Check that all errors are "accepted" errors
1169   */
1170   while (true)
1171   {
1172     const NdbError &err= first->getNdbError();
1173     if (err.classification != NdbError::NoError &&
1174         err.classification != NdbError::ConstraintViolation &&
1175         err.classification != NdbError::NoDataFound)
1176     {
1177 #ifdef HAVE_NDB_BINLOG
1178       /* All conflict detection etc should be done before commit */
1179       DBUG_ASSERT((err.code != (int) error_conflict_fn_violation) &&
1180                   (err.code != (int) error_op_after_refresh_op));
1181 #endif
1182       DBUG_RETURN(err.code);
1183     }
1184     if (err.classification != NdbError::NoError)
1185       ignores++;
1186 
1187     if (first == last)
1188       break;
1189 
1190     first= trans->getNextCompletedOperation(first);
1191   }
1192   if (ignore_count)
1193     *ignore_count= ignores;
1194   DBUG_RETURN(0);
1195 }
1196 
1197 void
release_completed_operations(NdbTransaction * trans)1198 ha_ndbcluster::release_completed_operations(NdbTransaction *trans)
1199 {
1200   /**
1201    * mysqld reads/write blobs fully,
1202    *   which means that it does not keep blobs
1203    *   open/active over execute, which means
1204    *   that it should be safe to release anything completed here
1205    *
1206    *   i.e don't check for blobs, but just go ahead and release
1207    */
1208   trans->releaseCompletedOperations();
1209   trans->releaseCompletedQueries();
1210 }
1211 
1212 int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1213                       bool ignore_no_key,
1214                       uint *ignore_count= 0);
1215 inline
execute_no_commit(Thd_ndb * thd_ndb,NdbTransaction * trans,bool ignore_no_key,uint * ignore_count)1216 int execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
1217                       bool ignore_no_key,
1218                       uint *ignore_count)
1219 {
1220   DBUG_ENTER("execute_no_commit");
1221   ha_ndbcluster::release_completed_operations(trans);
1222   const NdbOperation *first= trans->getFirstDefinedOperation();
1223   const NdbOperation *last= trans->getLastDefinedOperation();
1224   thd_ndb->m_execute_count++;
1225   thd_ndb->m_unsent_bytes= 0;
1226   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1227   if (trans->execute(NdbTransaction::NoCommit,
1228                      NdbOperation::AO_IgnoreError,
1229                      thd_ndb->m_force_send))
1230   {
1231     DBUG_RETURN(-1);
1232   }
1233   if (!ignore_no_key || trans->getNdbError().code == 0)
1234     DBUG_RETURN(trans->getNdbError().code);
1235 
1236   DBUG_RETURN(check_completed_operations_pre_commit(thd_ndb, trans,
1237                                                     first, last,
1238                                                     ignore_count));
1239 }
1240 
1241 int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,
1242                    int force_send, int ignore_error, uint *ignore_count= 0);
1243 inline
execute_commit(THD * thd,Thd_ndb * thd_ndb,NdbTransaction * trans,int force_send,int ignore_error,uint * ignore_count)1244 int execute_commit(THD* thd, Thd_ndb *thd_ndb, NdbTransaction *trans,
1245                    int force_send, int ignore_error, uint *ignore_count)
1246 {
1247   DBUG_ENTER("execute_commit");
1248   NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;
1249   if (thd_ndb->m_unsent_bytes && !ignore_error)
1250   {
1251     /*
1252       We have unsent bytes and cannot ignore error.  Calling execute
1253       with NdbOperation::AO_IgnoreError will result in possible commit
1254       of a transaction although there is an error.
1255     */
1256     ao= NdbOperation::AbortOnError;
1257   }
1258   const NdbOperation *first= trans->getFirstDefinedOperation();
1259   const NdbOperation *last= trans->getLastDefinedOperation();
1260   thd_ndb->m_execute_count++;
1261   thd_ndb->m_unsent_bytes= 0;
1262   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1263   if (trans->execute(NdbTransaction::Commit, ao, force_send))
1264   {
1265     if (thd->slave_thread)
1266       g_ndb_slave_state.atTransactionAbort();
1267     DBUG_RETURN(-1);
1268   }
1269   /* Success of some sort */
1270   if (thd->slave_thread)
1271   {
1272     g_ndb_slave_state.atTransactionCommit();
1273   }
1274   if (!ignore_error || trans->getNdbError().code == 0)
1275     DBUG_RETURN(trans->getNdbError().code);
1276   DBUG_RETURN(check_completed_operations(thd_ndb, trans, first, last,
1277                                          ignore_count));
1278 }
1279 
1280 inline
execute_no_commit_ie(Thd_ndb * thd_ndb,NdbTransaction * trans)1281 int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)
1282 {
1283   DBUG_ENTER("execute_no_commit_ie");
1284   ha_ndbcluster::release_completed_operations(trans);
1285   int res= trans->execute(NdbTransaction::NoCommit,
1286                           NdbOperation::AO_IgnoreError,
1287                           thd_ndb->m_force_send);
1288   thd_ndb->m_unsent_bytes= 0;
1289   thd_ndb->m_execute_count++;
1290   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
1291   DBUG_RETURN(res);
1292 }
1293 
1294 /*
1295   Place holder for ha_ndbcluster thread specific data
1296 */
1297 typedef struct st_thd_ndb_share {
1298   const void *key;
1299   struct Ndb_local_table_statistics stat;
1300 } THD_NDB_SHARE;
1301 static
thd_ndb_share_get_key(THD_NDB_SHARE * thd_ndb_share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))1302 uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,
1303                             my_bool not_used MY_ATTRIBUTE((unused)))
1304 {
1305   *length= sizeof(thd_ndb_share->key);
1306   return (uchar*) &thd_ndb_share->key;
1307 }
1308 
Thd_ndb(THD * thd)1309 Thd_ndb::Thd_ndb(THD* thd) :
1310   m_thd(thd),
1311   schema_locks_count(0)
1312 {
1313   connection= ndb_get_cluster_connection();
1314   m_connect_count= connection->get_connect_count();
1315   ndb= new Ndb(connection, "");
1316   lock_count= 0;
1317   start_stmt_count= 0;
1318   save_point_count= 0;
1319   count= 0;
1320   trans= NULL;
1321   m_handler= NULL;
1322   m_error= FALSE;
1323   options= 0;
1324   (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,
1325                       (my_hash_get_key)thd_ndb_share_get_key, 0, 0);
1326   m_unsent_bytes= 0;
1327   m_execute_count= 0;
1328   m_scan_count= 0;
1329   m_pruned_scan_count= 0;
1330   m_sorted_scan_count= 0;
1331   m_pushed_queries_defined= 0;
1332   m_pushed_queries_dropped= 0;
1333   m_pushed_queries_executed= 0;
1334   m_pushed_reads= 0;
1335   memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));
1336   memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));
1337   global_schema_lock_trans= NULL;
1338   global_schema_lock_count= 0;
1339   global_schema_lock_error= 0;
1340   init_alloc_root(&m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);
1341 }
1342 
~Thd_ndb()1343 Thd_ndb::~Thd_ndb()
1344 {
1345   if (opt_ndb_extra_logging > 1)
1346   {
1347     /*
1348       print some stats about the connection at disconnect
1349     */
1350     for (int i= 0; i < MAX_NDB_NODES; i++)
1351     {
1352       if (m_transaction_hint_count[i] > 0 ||
1353           m_transaction_no_hint_count[i] > 0)
1354       {
1355         sql_print_information("tid %u: node[%u] "
1356                               "transaction_hint=%u, transaction_no_hint=%u",
1357                               (unsigned)current_thd->thread_id, i,
1358                               m_transaction_hint_count[i],
1359                               m_transaction_no_hint_count[i]);
1360       }
1361     }
1362   }
1363   if (ndb)
1364   {
1365     delete ndb;
1366     ndb= NULL;
1367   }
1368   changed_tables.empty();
1369   my_hash_free(&open_tables);
1370   free_root(&m_batch_mem_root, MYF(0));
1371 }
1372 
1373 
1374 inline
get_ndb(THD * thd)1375 Ndb *ha_ndbcluster::get_ndb(THD *thd)
1376 {
1377   return get_thd_ndb(thd)->ndb;
1378 }
1379 
1380 /*
1381  * manage uncommitted insert/deletes during transactio to get records correct
1382  */
1383 
set_rec_per_key()1384 void ha_ndbcluster::set_rec_per_key()
1385 {
1386   DBUG_ENTER("ha_ndbcluster::set_rec_per_key");
1387   /*
1388     Set up the 'rec_per_key[]' for keys which we have good knowledge
1389     about the distribution. 'rec_per_key[]' is init'ed to '0' by
1390     open_binary_frm(), which is interpreted as 'unknown' by optimizer.
1391     -> Not setting 'rec_per_key[]' will force the optimizer to use
1392     its own heuristic to estimate 'records pr. key'.
1393   */
1394   for (uint i=0 ; i < table_share->keys ; i++)
1395   {
1396     bool is_unique_index= false;
1397     KEY* key_info= table->key_info + i;
1398     switch (get_index_type(i))
1399     {
1400     case UNIQUE_INDEX:
1401     case PRIMARY_KEY_INDEX:
1402     {
1403       // Index is unique when all 'key_parts' are specified,
1404       // else distribution is unknown and not specified here.
1405       is_unique_index= true;
1406       break;
1407     }
1408     case UNIQUE_ORDERED_INDEX:
1409     case PRIMARY_KEY_ORDERED_INDEX:
1410       is_unique_index= true;
1411       // intentional fall thru to logic for ordered index
1412     case ORDERED_INDEX:
1413       // 'Records pr. key' are unknown for non-unique indexes.
1414       // (May change when we get better index statistics.)
1415     {
1416       THD *thd= current_thd;
1417       const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
1418                                     THDVAR(thd, index_stat_enable);
1419       if (index_stat_enable)
1420       {
1421         int err= ndb_index_stat_set_rpk(i);
1422         if (err != 0 &&
1423             /* no stats is not unexpected error */
1424             err != NdbIndexStat::NoIndexStats &&
1425             /* warning was printed at first error */
1426             err != Ndb_index_stat_error_HAS_ERROR)
1427         {
1428           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1429                               ER_CANT_GET_STAT, /* pun? */
1430                               "index stats (RPK) for key %s:"
1431                               " unexpected error %d",
1432                               key_info->name, err);
1433         }
1434       }
1435       // no fallback method...
1436       break;
1437     }
1438     default:
1439       DBUG_ASSERT(false);
1440     }
1441     // set rows per key to 1 for complete key given for unique/primary index
1442     if (is_unique_index)
1443     {
1444       key_info->rec_per_key[key_info->user_defined_key_parts-1]= 1;
1445     }
1446   }
1447   DBUG_VOID_RETURN;
1448 }
1449 
records()1450 ha_rows ha_ndbcluster::records()
1451 {
1452   DBUG_ENTER("ha_ndbcluster::records");
1453   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1454                       ((const NDBTAB *)m_table)->getTableId(),
1455                       m_table_info->no_uncommitted_rows_count));
1456 
1457   if (update_stats(table->in_use, 1) == 0)
1458   {
1459     DBUG_RETURN(stats.records);
1460   }
1461   else
1462   {
1463     DBUG_RETURN(HA_POS_ERROR);
1464   }
1465 }
1466 
no_uncommitted_rows_execute_failure()1467 void ha_ndbcluster::no_uncommitted_rows_execute_failure()
1468 {
1469   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
1470   get_thd_ndb(current_thd)->m_error= TRUE;
1471   DBUG_VOID_RETURN;
1472 }
1473 
no_uncommitted_rows_update(int c)1474 void ha_ndbcluster::no_uncommitted_rows_update(int c)
1475 {
1476   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
1477   struct Ndb_local_table_statistics *local_info= m_table_info;
1478   local_info->no_uncommitted_rows_count+= c;
1479   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
1480                       ((const NDBTAB *)m_table)->getTableId(),
1481                       local_info->no_uncommitted_rows_count));
1482   DBUG_VOID_RETURN;
1483 }
1484 
no_uncommitted_rows_reset(THD * thd)1485 void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
1486 {
1487   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
1488   Thd_ndb *thd_ndb= get_thd_ndb(thd);
1489   thd_ndb->count++;
1490   thd_ndb->m_error= FALSE;
1491   thd_ndb->m_unsent_bytes= 0;
1492   DBUG_VOID_RETURN;
1493 }
1494 
1495 
ndb_err(NdbTransaction * trans,bool have_lock)1496 int ha_ndbcluster::ndb_err(NdbTransaction *trans,
1497                            bool have_lock)
1498 {
1499   THD *thd= current_thd;
1500   int res;
1501   NdbError err= trans->getNdbError();
1502   DBUG_ENTER("ndb_err");
1503 
1504   switch (err.classification) {
1505   case NdbError::SchemaError:
1506   {
1507     // TODO perhaps we need to do more here, invalidate also in the cache
1508     m_table->setStatusInvalid();
1509     /* Close other open handlers not used by any thread */
1510     TABLE_LIST table_list;
1511     memset(&table_list, 0, sizeof(table_list));
1512     table_list.db= m_dbname;
1513     table_list.alias= table_list.table_name= m_tabname;
1514     close_cached_tables(thd, &table_list, have_lock, FALSE, FALSE);
1515     break;
1516   }
1517   default:
1518     break;
1519   }
1520   res= ndb_to_mysql_error(&err);
1521   DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d",
1522                       err.code, res));
1523   if (res == HA_ERR_FOUND_DUPP_KEY)
1524   {
1525     char *error_data= err.details;
1526     uint dupkey= MAX_KEY;
1527 
1528     for (uint i= 0; i < MAX_KEY; i++)
1529     {
1530       if (m_index[i].type == UNIQUE_INDEX ||
1531           m_index[i].type == UNIQUE_ORDERED_INDEX)
1532       {
1533         const NDBINDEX *unique_index=
1534           (const NDBINDEX *) m_index[i].unique_index;
1535         if (unique_index &&
1536             (char *) unique_index->getObjectId() == error_data)
1537         {
1538           dupkey= i;
1539           break;
1540         }
1541       }
1542     }
1543     if (m_rows_to_insert == 1)
1544     {
1545       /*
1546 	We can only distinguish between primary and non-primary
1547 	violations here, so we need to return MAX_KEY for non-primary
1548 	to signal that key is unknown
1549       */
1550       m_dupkey= err.code == 630 ? table_share->primary_key : dupkey;
1551     }
1552     else
1553     {
1554       /* We are batching inserts, offending key is not available */
1555       m_dupkey= (uint) -1;
1556     }
1557   }
1558   DBUG_RETURN(res);
1559 }
1560 
1561 
1562 /**
1563   Override the default get_error_message in order to add the
1564   error message of NDB .
1565 */
1566 
get_error_message(int error,String * buf)1567 bool ha_ndbcluster::get_error_message(int error,
1568                                       String *buf)
1569 {
1570   DBUG_ENTER("ha_ndbcluster::get_error_message");
1571   DBUG_PRINT("enter", ("error: %d", error));
1572 
1573   Ndb *ndb= check_ndb_in_thd(current_thd);
1574   if (!ndb)
1575     DBUG_RETURN(FALSE);
1576 
1577   const NdbError err= ndb->getNdbError(error);
1578   bool temporary= err.status==NdbError::TemporaryError;
1579   buf->set(err.message, strlen(err.message), &my_charset_bin);
1580   DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
1581   DBUG_RETURN(temporary);
1582 }
1583 
1584 
1585 /*
1586   field_used_length() returns the number of bytes actually used to
1587   store the data of the field. So for a varstring it includes both
1588   length byte(s) and string data, and anything after data_length()
1589   bytes are unused.
1590 */
1591 static
field_used_length(const Field * field)1592 uint32 field_used_length(const Field* field)
1593 {
1594  if (field->type() == MYSQL_TYPE_VARCHAR)
1595  {
1596    const Field_varstring* f = static_cast<const Field_varstring*>(field);
1597    return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();
1598                             // ^ no 'data_length() const'
1599  }
1600  return field->pack_length();
1601 }
1602 
1603 
1604 /**
1605   Check if MySQL field type forces var part in ndb storage
1606 */
field_type_forces_var_part(enum_field_types type)1607 static bool field_type_forces_var_part(enum_field_types type)
1608 {
1609   switch (type) {
1610   case MYSQL_TYPE_VAR_STRING:
1611   case MYSQL_TYPE_VARCHAR:
1612     return TRUE;
1613   case MYSQL_TYPE_TINY_BLOB:
1614   case MYSQL_TYPE_BLOB:
1615   case MYSQL_TYPE_MEDIUM_BLOB:
1616   case MYSQL_TYPE_LONG_BLOB:
1617   case MYSQL_TYPE_GEOMETRY:
1618     return FALSE;
1619   default:
1620     return FALSE;
1621   }
1622 }
1623 
1624 /*
1625  * This is used for every additional row operation, to update the guesstimate
1626  * of pending bytes to send, and to check if it is now time to flush a batch.
1627  */
1628 bool
add_row_check_if_batch_full_size(Thd_ndb * thd_ndb,uint size)1629 ha_ndbcluster::add_row_check_if_batch_full_size(Thd_ndb *thd_ndb, uint size)
1630 {
1631   if (thd_ndb->m_unsent_bytes == 0)
1632     free_root(&(thd_ndb->m_batch_mem_root), MY_MARK_BLOCKS_FREE);
1633 
1634   uint unsent= thd_ndb->m_unsent_bytes;
1635   unsent+= size;
1636   thd_ndb->m_unsent_bytes= unsent;
1637   return unsent >= thd_ndb->m_batch_size;
1638 }
1639 
1640 /*
1641   Return a generic buffer that will remain valid until after next execute.
1642 
1643   The memory is freed by the first call to add_row_check_if_batch_full_size()
1644   following any execute() call. The intention is that the memory is associated
1645   with one batch of operations during batched slave updates.
1646 
1647   Note in particular that using get_buffer() / copy_row_to_buffer() separately
1648   from add_row_check_if_batch_full_size() could make meory usage grow without
1649   limit, and that this sequence:
1650 
1651     execute()
1652     get_buffer() / copy_row_to_buffer()
1653     add_row_check_if_batch_full_size()
1654     ...
1655     execute()
1656 
1657   will free the memory already at add_row_check_if_batch_full_size() time, it
1658   will not remain valid until the second execute().
1659 */
1660 uchar *
get_buffer(Thd_ndb * thd_ndb,uint size)1661 ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)
1662 {
1663   return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);
1664 }
1665 
1666 uchar *
copy_row_to_buffer(Thd_ndb * thd_ndb,const uchar * record)1667 ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)
1668 {
1669   uchar *row= get_buffer(thd_ndb, table->s->reclength);
1670   if (unlikely(!row))
1671     return NULL;
1672   memcpy(row, record, table->s->reclength);
1673   return row;
1674 }
1675 
1676 /**
1677  * findBlobError
1678  * This method attempts to find an error in the hierarchy of runtime
1679  * NDBAPI objects from Blob up to transaction.
1680  * It will return -1 if no error is found, 0 if an error is found.
1681  */
findBlobError(NdbError & error,NdbBlob * pBlob)1682 int findBlobError(NdbError& error, NdbBlob* pBlob)
1683 {
1684   error= pBlob->getNdbError();
1685   if (error.code != 0)
1686     return 0;
1687 
1688   const NdbOperation* pOp= pBlob->getNdbOperation();
1689   error= pOp->getNdbError();
1690   if (error.code != 0)
1691     return 0;
1692 
1693   NdbTransaction* pTrans= pOp->getNdbTransaction();
1694   error= pTrans->getNdbError();
1695   if (error.code != 0)
1696     return 0;
1697 
1698   /* No error on any of the objects */
1699   return -1;
1700 }
1701 
1702 
g_get_ndb_blobs_value(NdbBlob * ndb_blob,void * arg)1703 int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
1704 {
1705   ha_ndbcluster *ha= (ha_ndbcluster *)arg;
1706   DBUG_ENTER("g_get_ndb_blobs_value");
1707   DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));
1708 
1709   if (ha->m_blob_counter == 0)   /* Reset total size at start of row */
1710     ha->m_blobs_row_total_size= 0;
1711 
1712   /* Count the total length needed for blob data. */
1713   int isNull;
1714   if (ndb_blob->getNull(isNull) != 0)
1715     ERR_RETURN(ndb_blob->getNdbError());
1716   if (isNull == 0) {
1717     Uint64 len64= 0;
1718     if (ndb_blob->getLength(len64) != 0)
1719       ERR_RETURN(ndb_blob->getNdbError());
1720     /* Align to Uint64. */
1721     ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);
1722     if (ha->m_blobs_row_total_size > 0xffffffff)
1723     {
1724       DBUG_ASSERT(FALSE);
1725       DBUG_RETURN(-1);
1726     }
1727     DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",
1728                         ha->m_blob_counter,
1729                         len64,
1730                         ha->m_blobs_row_total_size));
1731   }
1732   ha->m_blob_counter++;
1733 
1734   /*
1735     Wait until all blobs in this row are active, so we can allocate
1736     and use a common buffer containing all.
1737   */
1738   if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)
1739     DBUG_RETURN(0);
1740 
1741   /* Reset blob counter for next row (scan scenario) */
1742   ha->m_blob_counter= 0;
1743 
1744   /* Re-allocate bigger blob buffer for this row if necessary. */
1745   if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)
1746   {
1747     my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
1748     DBUG_PRINT("info", ("allocate blobs buffer size %u",
1749                         (uint32)(ha->m_blobs_row_total_size)));
1750     /* Windows compiler complains about my_malloc on non-size_t
1751      * validate mapping from Uint64 to size_t
1752      */
1753     if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)
1754     {
1755       ha->m_blobs_buffer= NULL;
1756       ha->m_blobs_buffer_size= 0;
1757       DBUG_RETURN(-1);
1758     }
1759 
1760     ha->m_blobs_buffer=
1761       (uchar*) my_malloc((size_t) ha->m_blobs_row_total_size, MYF(MY_WME));
1762     if (ha->m_blobs_buffer == NULL)
1763     {
1764       ha->m_blobs_buffer_size= 0;
1765       DBUG_RETURN(-1);
1766     }
1767     ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;
1768   }
1769 
1770   /*
1771     Now read all blob data.
1772     If we know the destination mysqld row, we also set the blob null bit and
1773     pointer/length (if not, it will be done instead in unpack_record()).
1774   */
1775   uint32 offset= 0;
1776   for (uint i= 0; i < ha->table->s->fields; i++)
1777   {
1778     Field *field= ha->table->field[i];
1779     if (! (field->flags & BLOB_FLAG))
1780       continue;
1781     NdbValue value= ha->m_value[i];
1782     if (value.blob == NULL)
1783     {
1784       DBUG_PRINT("info",("[%u] skipped", i));
1785       continue;
1786     }
1787     Field_blob *field_blob= (Field_blob *)field;
1788     NdbBlob *ndb_blob= value.blob;
1789     int isNull;
1790     if (ndb_blob->getNull(isNull) != 0)
1791       ERR_RETURN(ndb_blob->getNdbError());
1792     if (isNull == 0) {
1793       Uint64 len64= 0;
1794       if (ndb_blob->getLength(len64) != 0)
1795         ERR_RETURN(ndb_blob->getNdbError());
1796       DBUG_ASSERT(len64 < 0xffffffff);
1797       uchar *buf= ha->m_blobs_buffer + offset;
1798 	  uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);
1799       if (ndb_blob->readData(buf, len) != 0)
1800       {
1801         NdbError err;
1802         if (findBlobError(err, ndb_blob) == 0)
1803         {
1804           ERR_RETURN(err);
1805         }
1806         else
1807         {
1808           /* Should always have some error code set */
1809           assert(err.code != 0);
1810           ERR_RETURN(err);
1811         }
1812       }
1813       DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u",
1814                           i, offset, (long) buf, len));
1815       DBUG_ASSERT(len == len64);
1816       if (ha->m_blob_destination_record)
1817       {
1818         my_ptrdiff_t ptrdiff=
1819           ha->m_blob_destination_record - ha->table->record[0];
1820         field_blob->move_field_offset(ptrdiff);
1821         field_blob->set_ptr(len, buf);
1822         field_blob->set_notnull();
1823         field_blob->move_field_offset(-ptrdiff);
1824       }
1825       offset+= Uint32((len64 + 7) & ~((Uint64)7));
1826     }
1827     else if (ha->m_blob_destination_record)
1828     {
1829       /* Have to set length even in this case. */
1830       my_ptrdiff_t ptrdiff=
1831         ha->m_blob_destination_record - ha->table->record[0];
1832       uchar *buf= ha->m_blobs_buffer + offset;
1833       field_blob->move_field_offset(ptrdiff);
1834       field_blob->set_ptr((uint32)0, buf);
1835       field_blob->set_null();
1836       field_blob->move_field_offset(-ptrdiff);
1837       DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
1838     }
1839   }
1840 
1841   if (!ha->m_active_cursor)
1842   {
1843     /* Non-scan, Blob reads have been issued
1844      * execute them and then close the Blob
1845      * handles
1846      */
1847     for (uint i= 0; i < ha->table->s->fields; i++)
1848     {
1849       Field *field= ha->table->field[i];
1850       if (! (field->flags & BLOB_FLAG))
1851         continue;
1852       NdbValue value= ha->m_value[i];
1853       if (value.blob == NULL)
1854       {
1855         DBUG_PRINT("info",("[%u] skipped", i));
1856         continue;
1857       }
1858       NdbBlob *ndb_blob= value.blob;
1859 
1860       assert(ndb_blob->getState() == NdbBlob::Active);
1861 
1862       /* Call close() with execPendingBlobOps == true
1863        * For LM_CommittedRead access, this will enqueue
1864        * an unlock operation, which the Blob framework
1865        * code invoking this callback will execute before
1866        * returning control to the caller of execute()
1867        */
1868       if (ndb_blob->close(true) != 0)
1869       {
1870         ERR_RETURN(ndb_blob->getNdbError());
1871       }
1872     }
1873   }
1874 
1875   DBUG_RETURN(0);
1876 }
1877 
1878 /*
1879   Request reading of blob values.
1880 
1881   If dst_record is specified, the blob null bit, pointer, and length will be
1882   set in that record. Otherwise they must be set later by calling
1883   unpack_record().
1884 */
1885 int
get_blob_values(const NdbOperation * ndb_op,uchar * dst_record,const MY_BITMAP * bitmap)1886 ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,
1887                                const MY_BITMAP *bitmap)
1888 {
1889   uint i;
1890   DBUG_ENTER("ha_ndbcluster::get_blob_values");
1891 
1892   m_blob_counter= 0;
1893   m_blob_expected_count_per_row= 0;
1894   m_blob_destination_record= dst_record;
1895   m_blobs_row_total_size= 0;
1896   ndb_op->getNdbTransaction()->
1897     setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));
1898 
1899   for (i= 0; i < table_share->fields; i++)
1900   {
1901     Field *field= table->field[i];
1902     if (!(field->flags & BLOB_FLAG))
1903       continue;
1904 
1905     DBUG_PRINT("info", ("fieldnr=%d", i));
1906     NdbBlob *ndb_blob;
1907     if (bitmap_is_set(bitmap, i))
1908     {
1909       if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||
1910           ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)
1911         DBUG_RETURN(1);
1912       m_blob_expected_count_per_row++;
1913     }
1914     else
1915       ndb_blob= NULL;
1916 
1917     m_value[i].blob= ndb_blob;
1918   }
1919 
1920   DBUG_RETURN(0);
1921 }
1922 
1923 int
set_blob_values(const NdbOperation * ndb_op,my_ptrdiff_t row_offset,const MY_BITMAP * bitmap,uint * set_count,bool batch)1924 ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,
1925                                my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,
1926                                uint *set_count, bool batch)
1927 {
1928   uint field_no;
1929   uint *blob_index, *blob_index_end;
1930   int res= 0;
1931   DBUG_ENTER("ha_ndbcluster::set_blob_values");
1932 
1933   *set_count= 0;
1934 
1935   if (table_share->blob_fields == 0)
1936     DBUG_RETURN(0);
1937 
1938   ndb_op->getNdbTransaction()->
1939     setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));
1940   blob_index= table_share->blob_field;
1941   blob_index_end= blob_index + table_share->blob_fields;
1942   do
1943   {
1944     field_no= *blob_index;
1945     /* A NULL bitmap sets all blobs. */
1946     if (bitmap && !bitmap_is_set(bitmap, field_no))
1947       continue;
1948     Field *field= table->field[field_no];
1949 
1950     NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);
1951     if (ndb_blob == NULL)
1952       ERR_RETURN(ndb_op->getNdbError());
1953     if (field->is_real_null(row_offset))
1954     {
1955       DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));
1956       if (ndb_blob->setNull() != 0)
1957         ERR_RETURN(ndb_op->getNdbError());
1958     }
1959     else
1960     {
1961       Field_blob *field_blob= (Field_blob *)field;
1962 
1963       // Get length and pointer to data
1964       const uchar *field_ptr= field->ptr + row_offset;
1965       uint32 blob_len= field_blob->get_length(field_ptr);
1966       uchar* blob_ptr= NULL;
1967       field_blob->get_ptr(&blob_ptr);
1968 
1969       // Looks like NULL ptr signals length 0 blob
1970       if (blob_ptr == NULL) {
1971         DBUG_ASSERT(blob_len == 0);
1972         blob_ptr= (uchar*)"";
1973       }
1974 
1975       DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",
1976                            (long) blob_ptr, blob_len));
1977       DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));
1978 
1979       /*
1980         NdbBlob requires the data pointer to remain valid until execute() time.
1981         So when batching, we need to copy the value to a temporary buffer.
1982       */
1983       if (batch && blob_len > 0)
1984       {
1985         uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);
1986         if (!tmp_buf)
1987           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1988         memcpy(tmp_buf, blob_ptr, blob_len);
1989         blob_ptr= tmp_buf;
1990       }
1991       res= ndb_blob->setValue((char*)blob_ptr, blob_len);
1992       if (res != 0)
1993         ERR_RETURN(ndb_op->getNdbError());
1994     }
1995 
1996     ++(*set_count);
1997   } while (++blob_index != blob_index_end);
1998 
1999   DBUG_RETURN(res);
2000 }
2001 
2002 /*
2003   This routine is shared by injector.  There is no common blobs buffer
2004   so the buffer and length are passed by reference.  Injector also
2005   passes a record pointer diff.
2006  */
get_ndb_blobs_value(TABLE * table,NdbValue * value_array,uchar * & buffer,uint & buffer_size,my_ptrdiff_t ptrdiff)2007 int get_ndb_blobs_value(TABLE* table, NdbValue* value_array,
2008                         uchar*& buffer, uint& buffer_size,
2009                         my_ptrdiff_t ptrdiff)
2010 {
2011   DBUG_ENTER("get_ndb_blobs_value");
2012 
2013   // Field has no field number so cannot use TABLE blob_field
2014   // Loop twice, first only counting total buffer size
2015   for (int loop= 0; loop <= 1; loop++)
2016   {
2017     uint32 offset= 0;
2018     for (uint i= 0; i < table->s->fields; i++)
2019     {
2020       Field *field= table->field[i];
2021       NdbValue value= value_array[i];
2022       if (! (field->flags & BLOB_FLAG))
2023         continue;
2024       if (value.blob == NULL)
2025       {
2026         DBUG_PRINT("info",("[%u] skipped", i));
2027         continue;
2028       }
2029       Field_blob *field_blob= (Field_blob *)field;
2030       NdbBlob *ndb_blob= value.blob;
2031       int isNull;
2032       if (ndb_blob->getNull(isNull) != 0)
2033         ERR_RETURN(ndb_blob->getNdbError());
2034       if (isNull == 0) {
2035         Uint64 len64= 0;
2036         if (ndb_blob->getLength(len64) != 0)
2037           ERR_RETURN(ndb_blob->getNdbError());
2038         // Align to Uint64
2039         uint32 size= Uint32(len64);
2040         if (size % 8 != 0)
2041           size+= 8 - size % 8;
2042         if (loop == 1)
2043         {
2044           uchar *buf= buffer + offset;
2045           uint32 len= 0xffffffff;  // Max uint32
2046           if (ndb_blob->readData(buf, len) != 0)
2047             ERR_RETURN(ndb_blob->getNdbError());
2048           DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u  [ptrdiff=%d]",
2049                               i, offset, (long) buf, len, (int)ptrdiff));
2050           DBUG_ASSERT(len == len64);
2051           // Ugly hack assumes only ptr needs to be changed
2052           field_blob->set_ptr_offset(ptrdiff, len, buf);
2053         }
2054         offset+= size;
2055       }
2056       else if (loop == 1) // undefined or null
2057       {
2058         // have to set length even in this case
2059         uchar *buf= buffer + offset; // or maybe NULL
2060         uint32 len= 0;
2061 	field_blob->set_ptr_offset(ptrdiff, len, buf);
2062         DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
2063       }
2064     }
2065     if (loop == 0 && offset > buffer_size)
2066     {
2067       my_free(buffer, MYF(MY_ALLOW_ZERO_PTR));
2068       buffer_size= 0;
2069       DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
2070       buffer= (uchar*) my_malloc(offset, MYF(MY_WME));
2071       if (buffer == NULL)
2072       {
2073         sql_print_error("ha_ndbcluster::get_ndb_blobs_value: "
2074                         "my_malloc(%u) failed", offset);
2075         DBUG_RETURN(-1);
2076       }
2077       buffer_size= offset;
2078     }
2079   }
2080   DBUG_RETURN(0);
2081 }
2082 
2083 
2084 /**
2085   Check if any set or get of blob value in current query.
2086 */
2087 
uses_blob_value(const MY_BITMAP * bitmap) const2088 bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const
2089 {
2090   uint *blob_index, *blob_index_end;
2091   if (table_share->blob_fields == 0)
2092     return FALSE;
2093 
2094   blob_index=     table_share->blob_field;
2095   blob_index_end= blob_index + table_share->blob_fields;
2096   do
2097   {
2098     if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
2099       return TRUE;
2100   } while (++blob_index != blob_index_end);
2101   return FALSE;
2102 }
2103 
release_blobs_buffer()2104 void ha_ndbcluster::release_blobs_buffer()
2105 {
2106   DBUG_ENTER("releaseBlobsBuffer");
2107   if (m_blobs_buffer_size > 0)
2108   {
2109     DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));
2110     my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
2111     m_blobs_buffer= 0;
2112     m_blobs_row_total_size= 0;
2113     m_blobs_buffer_size= 0;
2114   }
2115   DBUG_VOID_RETURN;
2116 }
2117 
2118 /**
2119   Get metadata for this table from NDB.
2120 
2121   Check that frm-file on disk is equal to frm-file
2122   of table accessed in NDB.
2123 
2124   @retval
2125     0    ok
2126   @retval
2127     -2   Meta data has changed; Re-read data and try again
2128 */
2129 
cmp_frm(const NDBTAB * ndbtab,const void * pack_data,uint pack_length)2130 int cmp_frm(const NDBTAB *ndbtab, const void *pack_data,
2131             uint pack_length)
2132 {
2133   DBUG_ENTER("cmp_frm");
2134   /*
2135     Compare FrmData in NDB with frm file from disk.
2136   */
2137   if ((pack_length != ndbtab->getFrmLength()) ||
2138       (memcmp(pack_data, ndbtab->getFrmData(), pack_length)))
2139     DBUG_RETURN(1);
2140   DBUG_RETURN(0);
2141 }
2142 
2143 /*
2144   Does type support a default value?
2145 */
2146 static bool
type_supports_default_value(enum_field_types mysql_type)2147 type_supports_default_value(enum_field_types mysql_type)
2148 {
2149   bool ret = (mysql_type != MYSQL_TYPE_BLOB &&
2150               mysql_type != MYSQL_TYPE_TINY_BLOB &&
2151               mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&
2152               mysql_type != MYSQL_TYPE_LONG_BLOB &&
2153               mysql_type != MYSQL_TYPE_GEOMETRY);
2154 
2155   return ret;
2156 }
2157 
2158 /**
2159    Check that Ndb data dictionary has the same default values
2160    as MySQLD for the current table.
2161    Called as part of a DBUG check as part of table open
2162 
2163    Returns
2164      0  - Defaults are ok
2165      -1 - Some default(s) are bad
2166 */
check_default_values(const NDBTAB * ndbtab)2167 int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)
2168 {
2169   /* Debug only method for checking table defaults aligned
2170      between MySQLD and Ndb
2171   */
2172   bool defaults_aligned= true;
2173 
2174   if (ndbtab->hasDefaultValues())
2175   {
2176     /* Ndb supports native defaults for non-pk columns */
2177     my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);
2178 
2179     for (uint f=0; f < table_share->fields; f++)
2180     {
2181       Field* field= table->field[f]; // Use Field struct from MySQLD table rep
2182       const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index);
2183 
2184       if ((! (field->flags & (PRI_KEY_FLAG |
2185                               NO_DEFAULT_VALUE_FLAG))) &&
2186           type_supports_default_value(field->real_type()))
2187       {
2188         /* We expect Ndb to have a native default for this
2189          * column
2190          */
2191         my_ptrdiff_t src_offset= table_share->default_values -
2192           field->table->record[0];
2193 
2194         /* Move field by offset to refer to default value */
2195         field->move_field_offset(src_offset);
2196 
2197         const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();
2198 
2199         if (ndb_default == NULL)
2200           /* MySQLD default must also be NULL */
2201           defaults_aligned= field->is_null();
2202         else
2203         {
2204           if (field->type() != MYSQL_TYPE_BIT)
2205           {
2206             defaults_aligned= (0 == field->cmp(ndb_default));
2207           }
2208           else
2209           {
2210             longlong value= (static_cast<Field_bit*>(field))->val_int();
2211             /* Map to NdbApi format - two Uint32s */
2212             Uint32 out[2];
2213             out[0] = 0;
2214             out[1] = 0;
2215             for (int b=0; b < 64; b++)
2216             {
2217               out[b >> 5] |= (value & 1) << (b & 31);
2218 
2219               value= value >> 1;
2220             }
2221             Uint32 defaultLen = field_used_length(field);
2222             defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);
2223             defaults_aligned= (0 == memcmp(ndb_default,
2224                                            out,
2225                                            defaultLen));
2226           }
2227         }
2228 
2229         field->move_field_offset(-src_offset);
2230 
2231         if (unlikely(!defaults_aligned))
2232         {
2233           DBUG_PRINT("info", ("Default values differ for column %u",
2234                               field->field_index));
2235           break;
2236         }
2237       }
2238       else
2239       {
2240         /* We don't expect Ndb to have a native default for this column */
2241         if (unlikely(ndbCol->getDefaultValue() != NULL))
2242         {
2243           /* Didn't expect that */
2244           DBUG_PRINT("info", ("Column %u has native default, but shouldn't."
2245                               " Flags=%u, type=%u",
2246                               field->field_index, field->flags, field->real_type()));
2247           defaults_aligned= false;
2248           break;
2249         }
2250       }
2251     }
2252     tmp_restore_column_map(table->read_set, old_map);
2253   }
2254 
2255   return (defaults_aligned? 0: -1);
2256 }
2257 
get_metadata(THD * thd,const char * path)2258 int ha_ndbcluster::get_metadata(THD *thd, const char *path)
2259 {
2260   Ndb *ndb= get_thd_ndb(thd)->ndb;
2261   NDBDICT *dict= ndb->getDictionary();
2262   const NDBTAB *tab;
2263   int error;
2264   DBUG_ENTER("get_metadata");
2265   DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));
2266 
2267   DBUG_ASSERT(m_table == NULL);
2268   DBUG_ASSERT(m_table_info == NULL);
2269 
2270   uchar *data= NULL, *pack_data= NULL;
2271   size_t length, pack_length;
2272 
2273   /*
2274     Compare FrmData in NDB with frm file from disk.
2275   */
2276   error= 0;
2277   if (readfrm(path, &data, &length) ||
2278       packfrm(data, length, &pack_data, &pack_length))
2279   {
2280     my_free(data, MYF(MY_ALLOW_ZERO_PTR));
2281     my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));
2282     DBUG_RETURN(1);
2283   }
2284 
2285   ndb->setDatabaseName(m_dbname);
2286   Ndb_table_guard ndbtab_g(dict, m_tabname);
2287   if (!(tab= ndbtab_g.get_table()))
2288     ERR_RETURN(dict->getNdbError());
2289 
2290   if (get_ndb_share_state(m_share) != NSS_ALTERED
2291       && cmp_frm(tab, pack_data, pack_length))
2292   {
2293     DBUG_PRINT("error",
2294                ("metadata, pack_length: %lu  getFrmLength: %d  memcmp: %d",
2295                 (ulong) pack_length, tab->getFrmLength(),
2296                 memcmp(pack_data, tab->getFrmData(), pack_length)));
2297     DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);
2298     DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());
2299     error= HA_ERR_TABLE_DEF_CHANGED;
2300   }
2301   my_free((char*)data, MYF(0));
2302   my_free((char*)pack_data, MYF(0));
2303 
2304   /* Now check that any Ndb native defaults are aligned
2305      with MySQLD defaults
2306   */
2307   DBUG_ASSERT(check_default_values(tab) == 0);
2308 
2309   if (error)
2310     goto err;
2311 
2312   DBUG_PRINT("info", ("fetched table %s", tab->getName()));
2313   m_table= tab;
2314 
2315   if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))
2316   {
2317     error= HA_ERR_OUT_OF_MEM;
2318     goto err;
2319   }
2320   if (table_share->primary_key == MAX_KEY)
2321   {
2322     /* Hidden primary key. */
2323     if ((error= add_hidden_pk_ndb_record(dict)) != 0)
2324       goto err;
2325   }
2326 
2327   if ((error= add_table_ndb_record(dict)) != 0)
2328     goto err;
2329 
2330   /*
2331     Approx. write size in bytes over transporter
2332   */
2333   m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
2334 
2335   /* Open indexes */
2336   if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)
2337     goto err;
2338 
2339   /*
2340     Backward compatibility for tables created without tablespace
2341     in .frm => read tablespace setting from engine
2342   */
2343   if (table_share->mysql_version < 50120 &&
2344       !table_share->tablespace /* safety */)
2345   {
2346     Uint32 id;
2347     if (tab->getTablespace(&id))
2348     {
2349       NdbDictionary::Tablespace ts= dict->getTablespace(id);
2350       NdbError ndberr= dict->getNdbError();
2351       if (ndberr.classification == NdbError::NoError)
2352       {
2353         const char *tablespace= ts.getName();
2354         const size_t tablespace_len= strlen(tablespace);
2355         if (tablespace_len != 0)
2356         {
2357           DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
2358           table_share->tablespace= strmake_root(&table_share->mem_root,
2359                                                 tablespace,
2360                                                 tablespace_len);
2361         }
2362       }
2363     }
2364   }
2365 
2366   ndbtab_g.release();
2367 
2368 #ifdef HAVE_NDB_BINLOG
2369   ndbcluster_read_binlog_replication(thd, ndb, m_share, m_table,
2370                                      ::server_id, table, FALSE);
2371 #endif
2372 
2373   DBUG_RETURN(0);
2374 
2375 err:
2376   ndbtab_g.invalidate();
2377   m_table= NULL;
2378   DBUG_RETURN(error);
2379 }
2380 
fix_unique_index_attr_order(NDB_INDEX_DATA & data,const NDBINDEX * index,KEY * key_info)2381 static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
2382                                        const NDBINDEX *index,
2383                                        KEY *key_info)
2384 {
2385   DBUG_ENTER("fix_unique_index_attr_order");
2386   unsigned sz= index->getNoOfIndexColumns();
2387 
2388   if (data.unique_index_attrid_map)
2389     my_free((char*)data.unique_index_attrid_map, MYF(0));
2390   data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME));
2391   if (data.unique_index_attrid_map == 0)
2392   {
2393     sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
2394                     (unsigned int)sz);
2395     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
2396   }
2397 
2398   KEY_PART_INFO* key_part= key_info->key_part;
2399   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2400   DBUG_ASSERT(key_info->user_defined_key_parts == sz);
2401   for (unsigned i= 0; key_part != end; key_part++, i++)
2402   {
2403     const char *field_name= key_part->field->field_name;
2404 #ifndef DBUG_OFF
2405    data.unique_index_attrid_map[i]= 255;
2406 #endif
2407     for (unsigned j= 0; j < sz; j++)
2408     {
2409       const NDBCOL *c= index->getColumn(j);
2410       if (strcmp(field_name, c->getName()) == 0)
2411       {
2412         data.unique_index_attrid_map[i]= j;
2413         break;
2414       }
2415     }
2416     DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
2417   }
2418   DBUG_RETURN(0);
2419 }
2420 
2421 /*
2422   Create all the indexes for a table.
2423   If any index should fail to be created,
2424   the error is returned immediately
2425 */
create_indexes(THD * thd,Ndb * ndb,TABLE * tab)2426 int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab)
2427 {
2428   uint i;
2429   int error= 0;
2430   const char *index_name;
2431   KEY* key_info= tab->key_info;
2432   const char **key_name= tab->s->keynames.type_names;
2433   DBUG_ENTER("ha_ndbcluster::create_indexes");
2434 
2435   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2436   {
2437     index_name= *key_name;
2438     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2439     error= create_index(thd, index_name, key_info, idx_type, i);
2440     if (error)
2441     {
2442       DBUG_PRINT("error", ("Failed to create index %u", i));
2443       break;
2444     }
2445   }
2446 
2447   DBUG_RETURN(error);
2448 }
2449 
ndb_init_index(NDB_INDEX_DATA & data)2450 static void ndb_init_index(NDB_INDEX_DATA &data)
2451 {
2452   data.type= UNDEFINED_INDEX;
2453   data.status= UNDEFINED;
2454   data.unique_index= NULL;
2455   data.index= NULL;
2456   data.unique_index_attrid_map= NULL;
2457   data.ndb_record_key= NULL;
2458   data.ndb_unique_record_key= NULL;
2459   data.ndb_unique_record_row= NULL;
2460 }
2461 
ndb_clear_index(NDBDICT * dict,NDB_INDEX_DATA & data)2462 static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)
2463 {
2464   if (data.unique_index_attrid_map)
2465   {
2466     my_free((char*)data.unique_index_attrid_map, MYF(0));
2467   }
2468   if (data.ndb_unique_record_key)
2469     dict->releaseRecord(data.ndb_unique_record_key);
2470   if (data.ndb_unique_record_row)
2471     dict->releaseRecord(data.ndb_unique_record_row);
2472   if (data.ndb_record_key)
2473     dict->releaseRecord(data.ndb_record_key);
2474   ndb_init_index(data);
2475 }
2476 
2477 static
ndb_protect_char(const char * from,char * to,uint to_length,char protect)2478 void ndb_protect_char(const char* from, char* to, uint to_length, char protect)
2479 {
2480   uint fpos= 0, tpos= 0;
2481 
2482   while(from[fpos] != '\0' && tpos < to_length - 1)
2483   {
2484     if (from[fpos] == protect)
2485     {
2486       int len= 0;
2487       to[tpos++]= '@';
2488       if(tpos < to_length - 5)
2489       {
2490         len= sprintf(to+tpos, "00%u", (uint) protect);
2491         tpos+= len;
2492       }
2493     }
2494     else
2495     {
2496       to[tpos++]= from[fpos];
2497     }
2498     fpos++;
2499   }
2500   to[tpos]= '\0';
2501 }
2502 
2503 /*
2504   Associate a direct reference to an index handle
2505   with an index (for faster access)
2506  */
add_index_handle(THD * thd,NDBDICT * dict,KEY * key_info,const char * key_name,uint index_no)2507 int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
2508                                     const char *key_name, uint index_no)
2509 {
2510   char index_name[FN_LEN + 1];
2511   int error= 0;
2512 
2513   NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
2514   m_index[index_no].type= idx_type;
2515   DBUG_ENTER("ha_ndbcluster::add_index_handle");
2516   DBUG_PRINT("enter", ("table %s", m_tabname));
2517 
2518   ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');
2519   if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
2520   {
2521     DBUG_PRINT("info", ("Get handle to index %s", index_name));
2522     const NDBINDEX *index;
2523     do
2524     {
2525       index= dict->getIndexGlobal(index_name, *m_table);
2526       if (!index)
2527         ERR_RETURN(dict->getNdbError());
2528       DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
2529                           (long) index,
2530                           index->getObjectId(),
2531                           index->getObjectVersion() & 0xFFFFFF,
2532                           index->getObjectVersion() >> 24,
2533                           index->getObjectStatus()));
2534       DBUG_ASSERT(index->getObjectStatus() ==
2535                   NdbDictionary::Object::Retrieved);
2536       break;
2537     } while (1);
2538     m_index[index_no].index= index;
2539   }
2540   if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
2541   {
2542     char unique_index_name[FN_LEN + 1];
2543     static const char* unique_suffix= "$unique";
2544     m_has_unique_index= TRUE;
2545     strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
2546     DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
2547     const NDBINDEX *index;
2548     do
2549     {
2550       index= dict->getIndexGlobal(unique_index_name, *m_table);
2551       if (!index)
2552         ERR_RETURN(dict->getNdbError());
2553       DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
2554                           (long) index,
2555                           index->getObjectId(),
2556                           index->getObjectVersion() & 0xFFFFFF,
2557                           index->getObjectVersion() >> 24,
2558                           index->getObjectStatus()));
2559       DBUG_ASSERT(index->getObjectStatus() ==
2560                   NdbDictionary::Object::Retrieved);
2561       break;
2562     } while (1);
2563     m_index[index_no].unique_index= index;
2564     error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
2565   }
2566 
2567   if (!error)
2568     error= add_index_ndb_record(dict, key_info, index_no);
2569 
2570   if (!error)
2571     m_index[index_no].status= ACTIVE;
2572 
2573   DBUG_RETURN(error);
2574 }
2575 
2576 /*
2577   We use this function to convert null bit masks, as found in class Field,
2578   to bit numbers, as used in NdbRecord.
2579 */
2580 static uint
null_bit_mask_to_bit_number(uchar bit_mask)2581 null_bit_mask_to_bit_number(uchar bit_mask)
2582 {
2583   switch (bit_mask)
2584   {
2585     case  0x1: return 0;
2586     case  0x2: return 1;
2587     case  0x4: return 2;
2588     case  0x8: return 3;
2589     case 0x10: return 4;
2590     case 0x20: return 5;
2591     case 0x40: return 6;
2592     case 0x80: return 7;
2593     default:
2594       DBUG_ASSERT(false);
2595       return 0;
2596   }
2597 }
2598 
2599 static void
ndb_set_record_specification(uint field_no,NdbDictionary::RecordSpecification * spec,const TABLE * table,const NdbDictionary::Table * ndb_table)2600 ndb_set_record_specification(uint field_no,
2601                              NdbDictionary::RecordSpecification *spec,
2602                              const TABLE *table,
2603                              const NdbDictionary::Table *ndb_table)
2604 {
2605   spec->column= ndb_table->getColumn(field_no);
2606   spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);
2607   if (table->field[field_no]->null_ptr)
2608   {
2609     spec->nullbit_byte_offset=
2610       Uint32(table->field[field_no]->null_ptr - table->record[0]);
2611     spec->nullbit_bit_in_byte=
2612       null_bit_mask_to_bit_number(table->field[field_no]->null_bit);
2613   }
2614   else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)
2615   {
2616     /* We need to store the position of the overflow bits. */
2617     const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);
2618     spec->nullbit_byte_offset=
2619       Uint32(field_bit->bit_ptr - table->record[0]);
2620     spec->nullbit_bit_in_byte= field_bit->bit_ofs;
2621   }
2622   else
2623   {
2624     spec->nullbit_byte_offset= 0;
2625     spec->nullbit_bit_in_byte= 0;
2626   }
2627 }
2628 
2629 int
add_table_ndb_record(NDBDICT * dict)2630 ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)
2631 {
2632   DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");
2633   NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2634   NdbRecord *rec;
2635   uint i;
2636 
2637   for (i= 0; i < table_share->fields; i++)
2638   {
2639     ndb_set_record_specification(i, &spec[i], table, m_table);
2640   }
2641 
2642   rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),
2643                           NdbDictionary::RecMysqldBitfield);
2644   if (! rec)
2645     ERR_RETURN(dict->getNdbError());
2646   m_ndb_record= rec;
2647 
2648   DBUG_RETURN(0);
2649 }
2650 
2651 /* Create NdbRecord for setting hidden primary key from Uint64. */
2652 int
add_hidden_pk_ndb_record(NDBDICT * dict)2653 ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)
2654 {
2655   DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");
2656   NdbDictionary::RecordSpecification spec[1];
2657   NdbRecord *rec;
2658 
2659   spec[0].column= m_table->getColumn(table_share->fields);
2660   spec[0].offset= 0;
2661   spec[0].nullbit_byte_offset= 0;
2662   spec[0].nullbit_bit_in_byte= 0;
2663 
2664   rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));
2665   if (! rec)
2666     ERR_RETURN(dict->getNdbError());
2667   m_ndb_hidden_key_record= rec;
2668 
2669   DBUG_RETURN(0);
2670 }
2671 
2672 int
add_index_ndb_record(NDBDICT * dict,KEY * key_info,uint index_no)2673 ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)
2674 {
2675   DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");
2676   NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
2677   NdbRecord *rec;
2678 
2679   Uint32 offset= 0;
2680   for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2681   {
2682     KEY_PART_INFO *kp= &key_info->key_part[i];
2683 
2684     spec[i].column= m_table->getColumn(kp->fieldnr - 1);
2685     if (! spec[i].column)
2686       ERR_RETURN(dict->getNdbError());
2687     if (kp->null_bit)
2688     {
2689       /* Nullable column. */
2690       spec[i].offset= offset + 1;           // First byte is NULL flag
2691       spec[i].nullbit_byte_offset= offset;
2692       spec[i].nullbit_bit_in_byte= 0;
2693     }
2694     else
2695     {
2696       /* Not nullable column. */
2697       spec[i].offset= offset;
2698       spec[i].nullbit_byte_offset= 0;
2699       spec[i].nullbit_bit_in_byte= 0;
2700     }
2701     offset+= kp->store_length;
2702   }
2703 
2704   if (m_index[index_no].index)
2705   {
2706     /*
2707       Enable MysqldShrinkVarchar flag so that the two-byte length used by
2708       mysqld for short varchar keys is correctly converted into a one-byte
2709       length used by Ndb kernel.
2710     */
2711     rec= dict->createRecord(m_index[index_no].index, m_table,
2712                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2713                             ( NdbDictionary::RecMysqldShrinkVarchar |
2714                               NdbDictionary::RecMysqldBitfield ));
2715     if (! rec)
2716       ERR_RETURN(dict->getNdbError());
2717     m_index[index_no].ndb_record_key= rec;
2718   }
2719   else
2720     m_index[index_no].ndb_record_key= NULL;
2721 
2722   if (m_index[index_no].unique_index)
2723   {
2724     rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2725                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2726                             ( NdbDictionary::RecMysqldShrinkVarchar |
2727                               NdbDictionary::RecMysqldBitfield ));
2728     if (! rec)
2729       ERR_RETURN(dict->getNdbError());
2730     m_index[index_no].ndb_unique_record_key= rec;
2731   }
2732   else if (index_no == table_share->primary_key)
2733   {
2734     /* The primary key is special, there is no explicit NDB index associated. */
2735     rec= dict->createRecord(m_table,
2736                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2737                             ( NdbDictionary::RecMysqldShrinkVarchar |
2738                               NdbDictionary::RecMysqldBitfield ));
2739     if (! rec)
2740       ERR_RETURN(dict->getNdbError());
2741     m_index[index_no].ndb_unique_record_key= rec;
2742   }
2743   else
2744     m_index[index_no].ndb_unique_record_key= NULL;
2745 
2746   /* Now do the same, but this time with offsets from Field, for row access. */
2747   for (uint i= 0; i < key_info->user_defined_key_parts; i++)
2748   {
2749     const KEY_PART_INFO *kp= &key_info->key_part[i];
2750 
2751     spec[i].offset= kp->offset;
2752     if (kp->null_bit)
2753     {
2754       /* Nullable column. */
2755       spec[i].nullbit_byte_offset= kp->null_offset;
2756       spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);
2757     }
2758     else
2759     {
2760       /* Not nullable column. */
2761       spec[i].nullbit_byte_offset= 0;
2762       spec[i].nullbit_bit_in_byte= 0;
2763     }
2764   }
2765 
2766   if (m_index[index_no].unique_index)
2767   {
2768     rec= dict->createRecord(m_index[index_no].unique_index, m_table,
2769                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2770                             NdbDictionary::RecMysqldBitfield);
2771     if (! rec)
2772       ERR_RETURN(dict->getNdbError());
2773     m_index[index_no].ndb_unique_record_row= rec;
2774   }
2775   else if (index_no == table_share->primary_key)
2776   {
2777     rec= dict->createRecord(m_table,
2778                             spec, key_info->user_defined_key_parts, sizeof(spec[0]),
2779                             NdbDictionary::RecMysqldBitfield);
2780     if (! rec)
2781       ERR_RETURN(dict->getNdbError());
2782     m_index[index_no].ndb_unique_record_row= rec;
2783   }
2784   else
2785     m_index[index_no].ndb_unique_record_row= NULL;
2786 
2787   DBUG_RETURN(0);
2788 }
2789 
2790 /*
2791   Associate index handles for each index of a table
2792 */
open_indexes(THD * thd,Ndb * ndb,TABLE * tab,bool ignore_error)2793 int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,
2794                                 bool ignore_error)
2795 {
2796   uint i;
2797   int error= 0;
2798   NDBDICT *dict= ndb->getDictionary();
2799   KEY* key_info= tab->key_info;
2800   const char **key_name= tab->s->keynames.type_names;
2801   DBUG_ENTER("ha_ndbcluster::open_indexes");
2802   m_has_unique_index= FALSE;
2803   btree_keys.clear_all();
2804   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2805   {
2806     if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
2807     {
2808       if (ignore_error)
2809         m_index[i].index= m_index[i].unique_index= NULL;
2810       else
2811         break;
2812     }
2813     m_index[i].null_in_unique_index= FALSE;
2814     if (check_index_fields_not_null(key_info))
2815       m_index[i].null_in_unique_index= TRUE;
2816 
2817     if (error == 0 && MY_TEST(index_flags(i, 0, 0) & HA_READ_RANGE))
2818       btree_keys.set_bit(i);
2819   }
2820 
2821   if (error && !ignore_error)
2822   {
2823     while (i > 0)
2824     {
2825       i--;
2826       if (m_index[i].index)
2827       {
2828          dict->removeIndexGlobal(*m_index[i].index, 1);
2829          m_index[i].index= NULL;
2830       }
2831       if (m_index[i].unique_index)
2832       {
2833          dict->removeIndexGlobal(*m_index[i].unique_index, 1);
2834          m_index[i].unique_index= NULL;
2835       }
2836     }
2837   }
2838 
2839   DBUG_ASSERT(error == 0 || error == 4243);
2840 
2841   DBUG_RETURN(error);
2842 }
2843 
2844 /*
2845   Renumber indexes in index list by shifting out
2846   indexes that are to be dropped
2847  */
renumber_indexes(Ndb * ndb,TABLE * tab)2848 void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
2849 {
2850   uint i;
2851   const char *index_name;
2852   KEY* key_info= tab->key_info;
2853   const char **key_name= tab->s->keynames.type_names;
2854   DBUG_ENTER("ha_ndbcluster::renumber_indexes");
2855 
2856   for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
2857   {
2858     index_name= *key_name;
2859     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2860     m_index[i].type= idx_type;
2861     if (m_index[i].status == TO_BE_DROPPED)
2862     {
2863       DBUG_PRINT("info", ("Shifting index %s(%i) out of the list",
2864                           index_name, i));
2865       NDB_INDEX_DATA tmp;
2866       uint j= i + 1;
2867       // Shift index out of list
2868       while(j != MAX_KEY && m_index[j].status != UNDEFINED)
2869       {
2870         tmp=  m_index[j - 1];
2871         m_index[j - 1]= m_index[j];
2872         m_index[j]= tmp;
2873         j++;
2874       }
2875     }
2876   }
2877 
2878   DBUG_VOID_RETURN;
2879 }
2880 
2881 /*
2882   Drop all indexes that are marked for deletion
2883 */
drop_indexes(Ndb * ndb,TABLE * tab)2884 int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
2885 {
2886   uint i;
2887   int error= 0;
2888   const char *index_name;
2889   KEY* key_info= tab->key_info;
2890   NDBDICT *dict= ndb->getDictionary();
2891   DBUG_ENTER("ha_ndbcluster::drop_indexes");
2892 
2893   for (i= 0; i < tab->s->keys; i++, key_info++)
2894   {
2895     NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
2896     m_index[i].type= idx_type;
2897     if (m_index[i].status == TO_BE_DROPPED)
2898     {
2899       const NdbDictionary::Index *index= m_index[i].index;
2900       const NdbDictionary::Index *unique_index= m_index[i].unique_index;
2901 
2902       if (index)
2903       {
2904         index_name= index->getName();
2905         DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));
2906         // Drop ordered index from ndb
2907         error= dict->dropIndexGlobal(*index);
2908         if (!error)
2909         {
2910           dict->removeIndexGlobal(*index, 1);
2911           m_index[i].index= NULL;
2912         }
2913       }
2914       if (!error && unique_index)
2915       {
2916         index_name= unique_index->getName();
2917         DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
2918         // Drop unique index from ndb
2919         error= dict->dropIndexGlobal(*unique_index);
2920         if (!error)
2921         {
2922           dict->removeIndexGlobal(*unique_index, 1);
2923           m_index[i].unique_index= NULL;
2924         }
2925       }
2926       if (error)
2927         DBUG_RETURN(error);
2928       ndb_clear_index(dict, m_index[i]);
2929       continue;
2930     }
2931   }
2932 
2933   DBUG_RETURN(error);
2934 }
2935 
2936 /**
2937   Decode the type of an index from information
2938   provided in table object.
2939 */
get_index_type_from_table(uint inx) const2940 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
2941 {
2942   return get_index_type_from_key(inx, table_share->key_info,
2943                                  inx == table_share->primary_key);
2944 }
2945 
get_index_type_from_key(uint inx,KEY * key_info,bool primary) const2946 NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
2947                                                       KEY *key_info,
2948                                                       bool primary) const
2949 {
2950   bool is_hash_index=  (key_info[inx].algorithm ==
2951                         HA_KEY_ALG_HASH);
2952   if (primary)
2953     return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
2954 
2955   return ((key_info[inx].flags & HA_NOSAME) ?
2956           (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
2957           ORDERED_INDEX);
2958 }
2959 
check_index_fields_not_null(KEY * key_info)2960 bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info)
2961 {
2962   KEY_PART_INFO* key_part= key_info->key_part;
2963   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
2964   DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
2965 
2966   for (; key_part != end; key_part++)
2967     {
2968       Field* field= key_part->field;
2969       if (field->maybe_null())
2970 	DBUG_RETURN(TRUE);
2971     }
2972 
2973   DBUG_RETURN(FALSE);
2974 }
2975 
release_metadata(THD * thd,Ndb * ndb)2976 void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
2977 {
2978   uint i;
2979 
2980   DBUG_ENTER("release_metadata");
2981   DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
2982 
2983   NDBDICT *dict= ndb->getDictionary();
2984   int invalidate_indexes= 0;
2985   if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
2986   {
2987     invalidate_indexes = 1;
2988   }
2989   if (m_table != NULL)
2990   {
2991     if (m_ndb_record != NULL)
2992     {
2993       dict->releaseRecord(m_ndb_record);
2994       m_ndb_record= NULL;
2995     }
2996     if (m_ndb_hidden_key_record != NULL)
2997     {
2998       dict->releaseRecord(m_ndb_hidden_key_record);
2999       m_ndb_hidden_key_record= NULL;
3000     }
3001     if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
3002       invalidate_indexes= 1;
3003     dict->removeTableGlobal(*m_table, invalidate_indexes);
3004   }
3005   // TODO investigate
3006   DBUG_ASSERT(m_table_info == NULL);
3007   m_table_info= NULL;
3008 
3009   // Release index list
3010   for (i= 0; i < MAX_KEY; i++)
3011   {
3012     if (m_index[i].unique_index)
3013     {
3014       DBUG_ASSERT(m_table != NULL);
3015       dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
3016     }
3017     if (m_index[i].index)
3018     {
3019       DBUG_ASSERT(m_table != NULL);
3020       dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
3021     }
3022     ndb_clear_index(dict, m_index[i]);
3023   }
3024 
3025   m_table= NULL;
3026   DBUG_VOID_RETURN;
3027 }
3028 
3029 
3030 /*
3031   Map from thr_lock_type to NdbOperation::LockMode
3032 */
3033 static inline
get_ndb_lock_mode(enum thr_lock_type type)3034 NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)
3035 {
3036   if (type >= TL_WRITE_ALLOW_WRITE)
3037     return NdbOperation::LM_Exclusive;
3038   if (type ==  TL_READ_WITH_SHARED_LOCKS)
3039     return NdbOperation::LM_Read;
3040   return NdbOperation::LM_CommittedRead;
3041 }
3042 
3043 
3044 static const ulong index_type_flags[]=
3045 {
3046   /* UNDEFINED_INDEX */
3047   0,
3048 
3049   /* PRIMARY_KEY_INDEX */
3050   HA_ONLY_WHOLE_INDEX,
3051 
3052   /* PRIMARY_KEY_ORDERED_INDEX */
3053   /*
3054      Enable HA_KEYREAD_ONLY when "sorted" indexes are supported,
3055      thus ORDER BY clauses can be optimized by reading directly
3056      through the index.
3057   */
3058   // HA_KEYREAD_ONLY |
3059   HA_READ_NEXT |
3060   HA_READ_PREV |
3061   HA_READ_RANGE |
3062   HA_READ_ORDER,
3063 
3064   /* UNIQUE_INDEX */
3065   HA_ONLY_WHOLE_INDEX,
3066 
3067   /* UNIQUE_ORDERED_INDEX */
3068   HA_READ_NEXT |
3069   HA_READ_PREV |
3070   HA_READ_RANGE |
3071   HA_READ_ORDER,
3072 
3073   /* ORDERED_INDEX */
3074   HA_READ_NEXT |
3075   HA_READ_PREV |
3076   HA_READ_RANGE |
3077   HA_READ_ORDER
3078 };
3079 
3080 static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);
3081 
get_index_type(uint idx_no) const3082 inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
3083 {
3084   DBUG_ASSERT(idx_no < MAX_KEY);
3085   return m_index[idx_no].type;
3086 }
3087 
has_null_in_unique_index(uint idx_no) const3088 inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
3089 {
3090   DBUG_ASSERT(idx_no < MAX_KEY);
3091   return m_index[idx_no].null_in_unique_index;
3092 }
3093 
3094 
3095 /**
3096   Get the flags for an index.
3097 
3098   @return
3099     flags depending on the type of the index.
3100 */
3101 
index_flags(uint idx_no,uint part,bool all_parts) const3102 inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
3103                                         bool all_parts) const
3104 {
3105   DBUG_ENTER("ha_ndbcluster::index_flags");
3106   DBUG_PRINT("enter", ("idx_no: %u", idx_no));
3107   DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
3108   DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] |
3109               HA_KEY_SCAN_NOT_ROR);
3110 }
3111 
3112 bool
primary_key_is_clustered()3113 ha_ndbcluster::primary_key_is_clustered()
3114 {
3115 
3116   if (table->s->primary_key == MAX_KEY)
3117     return false;
3118 
3119   /*
3120     NOTE 1: our ordered indexes are not really clustered
3121     but since accesing data when scanning index is free
3122     it's a good approximation
3123 
3124     NOTE 2: We really should consider DD attributes here too
3125     (for which there is IO to read data when scanning index)
3126     but that will need to be handled later...
3127   */
3128   const ndb_index_type idx_type =
3129     get_index_type_from_table(table->s->primary_key);
3130   return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||
3131           idx_type == UNIQUE_ORDERED_INDEX ||
3132           idx_type == ORDERED_INDEX);
3133 }
3134 
check_index_fields_in_write_set(uint keyno)3135 bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)
3136 {
3137   KEY* key_info= table->key_info + keyno;
3138   KEY_PART_INFO* key_part= key_info->key_part;
3139   KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
3140   uint i;
3141   DBUG_ENTER("check_index_fields_in_write_set");
3142 
3143   for (i= 0; key_part != end; key_part++, i++)
3144   {
3145     Field* field= key_part->field;
3146     if (!bitmap_is_set(table->write_set, field->field_index))
3147     {
3148       DBUG_RETURN(false);
3149     }
3150   }
3151 
3152   DBUG_RETURN(true);
3153 }
3154 
3155 
3156 /**
3157   Read one record from NDB using primary key.
3158 */
3159 
pk_read(const uchar * key,uint key_len,uchar * buf,uint32 * part_id)3160 int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,
3161                            uint32 *part_id)
3162 {
3163   NdbConnection *trans= m_thd_ndb->trans;
3164   int res;
3165   DBUG_ENTER("pk_read");
3166   DBUG_PRINT("enter", ("key_len: %u read_set=%x",
3167                        key_len, table->read_set->bitmap[0]));
3168   DBUG_DUMP("key", key, key_len);
3169   DBUG_ASSERT(trans);
3170 
3171   NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3172 
3173 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3174   if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess, table->s->primary_key))
3175   {
3176     // Is parent of pushed join
3177     DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
3178     const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,
3179                                                      (m_user_defined_partitioning ?
3180                                                      part_id : NULL));
3181     if (unlikely(error))
3182       DBUG_RETURN(error);
3183 
3184     DBUG_ASSERT(m_active_query!=NULL);
3185     if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3186         m_active_query->getNdbError().code)
3187     {
3188       table->status= STATUS_NOT_FOUND;
3189       DBUG_RETURN(ndb_err(trans));
3190     }
3191 
3192     int result= fetch_next_pushed();
3193     if (result == NdbQuery::NextResult_gotRow)
3194     {
3195       DBUG_RETURN(0);
3196     }
3197     else if (result == NdbQuery::NextResult_scanComplete)
3198     {
3199       DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3200     }
3201     else
3202     {
3203       DBUG_RETURN(ndb_err(trans));
3204     }
3205   }
3206   else
3207 #endif
3208   {
3209     if (m_pushed_join_operation == PUSHED_ROOT)
3210     {
3211       m_thd_ndb->m_pushed_queries_dropped++;
3212     }
3213 
3214     const NdbOperation *op;
3215     if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,
3216                                        (m_user_defined_partitioning ?
3217                                         part_id :
3218                                         NULL))))
3219       ERR_RETURN(trans->getNdbError());
3220 
3221     if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
3222         op->getNdbError().code)
3223     {
3224       table->status= STATUS_NOT_FOUND;
3225       DBUG_RETURN(ndb_err(trans));
3226     }
3227     table->status= 0;
3228     DBUG_RETURN(0);
3229   }
3230 }
3231 
3232 /**
3233   Update primary key or part id by doing delete insert.
3234 */
3235 
ndb_pk_update_row(THD * thd,const uchar * old_data,uchar * new_data,uint32 old_part_id)3236 int ha_ndbcluster::ndb_pk_update_row(THD *thd,
3237                                      const uchar *old_data, uchar *new_data,
3238                                      uint32 old_part_id)
3239 {
3240   NdbTransaction *trans= m_thd_ndb->trans;
3241   int error;
3242   const NdbOperation *op;
3243   DBUG_ENTER("ndb_pk_update_row");
3244   DBUG_ASSERT(trans);
3245 
3246   NdbOperation::OperationOptions *poptions = NULL;
3247   NdbOperation::OperationOptions options;
3248   options.optionsPresent=0;
3249 
3250   DBUG_PRINT("info", ("primary key update or partition change, "
3251                       "doing read+delete+insert"));
3252   // Get all old fields, since we optimize away fields not in query
3253 
3254   const NdbRecord *key_rec;
3255   const uchar *key_row;
3256 
3257   if (m_user_defined_partitioning)
3258   {
3259     options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3260     options.partitionId=old_part_id;
3261     poptions=&options;
3262   }
3263 
3264   setup_key_ref_for_ndb_record(&key_rec, &key_row, old_data, FALSE);
3265 
3266   if (!bitmap_is_set_all(table->read_set))
3267   {
3268     /*
3269       Need to read rest of columns for later re-insert.
3270 
3271       Use mask only with columns that are not in write_set, not in
3272       read_set, and not part of the primary key.
3273     */
3274 
3275     bitmap_copy(&m_bitmap, table->read_set);
3276     bitmap_union(&m_bitmap, table->write_set);
3277     bitmap_invert(&m_bitmap);
3278     if (!(op= trans->readTuple(key_rec, (const char *)key_row,
3279                                m_ndb_record, (char *)new_data,
3280                                get_ndb_lock_mode(m_lock.type),
3281                                (const unsigned char *)(m_bitmap.bitmap),
3282                                poptions,
3283                                sizeof(NdbOperation::OperationOptions))))
3284       ERR_RETURN(trans->getNdbError());
3285 
3286     if (table_share->blob_fields > 0)
3287     {
3288       my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3289       error= get_blob_values(op, new_data, &m_bitmap);
3290       dbug_tmp_restore_column_map(table->read_set, old_map);
3291       if (error != 0)
3292         ERR_RETURN(op->getNdbError());
3293     }
3294     if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3295     {
3296       table->status= STATUS_NOT_FOUND;
3297       DBUG_RETURN(ndb_err(trans));
3298     }
3299   }
3300 
3301   // Delete old row
3302   error= ndb_delete_row(old_data, TRUE);
3303   if (error)
3304   {
3305     DBUG_PRINT("info", ("delete failed"));
3306     DBUG_RETURN(error);
3307   }
3308 
3309   // Insert new row
3310   DBUG_PRINT("info", ("delete succeded"));
3311   bool batched_update= (m_active_cursor != 0);
3312   /*
3313     If we are updating a primary key with auto_increment
3314     then we need to update the auto_increment counter
3315   */
3316   if (table->found_next_number_field &&
3317       bitmap_is_set(table->write_set,
3318                     table->found_next_number_field->field_index) &&
3319       (error= set_auto_inc(thd, table->found_next_number_field)))
3320   {
3321     DBUG_RETURN(error);
3322   }
3323 
3324   /*
3325     We are mapping a MySQLD PK changing update to an NdbApi delete
3326     and insert.
3327     The original PK changing update may not have written new values
3328     to all columns, so the write set may be partial.
3329     We set the write set to be all columns so that all values are
3330     copied from the old row to the new row.
3331   */
3332   my_bitmap_map *old_map=
3333     tmp_use_all_columns(table, table->write_set);
3334   error= ndb_write_row(new_data, TRUE, batched_update);
3335   tmp_restore_column_map(table->write_set, old_map);
3336 
3337   if (error)
3338   {
3339     DBUG_PRINT("info", ("insert failed"));
3340     if (trans->commitStatus() == NdbConnection::Started)
3341     {
3342       if (thd->slave_thread)
3343         g_ndb_slave_state.atTransactionAbort();
3344       m_thd_ndb->m_unsent_bytes= 0;
3345       m_thd_ndb->m_execute_count++;
3346       DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));
3347       trans->execute(NdbTransaction::Rollback);
3348 #ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA
3349       int undo_res;
3350       // Undo delete_row(old_data)
3351       undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);
3352       if (undo_res)
3353         push_warning(table->in_use,
3354                      Sql_condition::WARN_LEVEL_WARN,
3355                      undo_res,
3356                      "NDB failed undoing delete at primary key update");
3357 #endif
3358     }
3359     DBUG_RETURN(error);
3360   }
3361   DBUG_PRINT("info", ("delete+insert succeeded"));
3362 
3363   DBUG_RETURN(0);
3364 }
3365 
3366 /**
3367   Check that all operations between first and last all
3368   have gotten the errcode
3369   If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
3370   for all succeeding operations
3371 */
check_all_operations_for_error(NdbTransaction * trans,const NdbOperation * first,const NdbOperation * last,uint errcode)3372 bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
3373                                                    const NdbOperation *first,
3374                                                    const NdbOperation *last,
3375                                                    uint errcode)
3376 {
3377   const NdbOperation *op= first;
3378   DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");
3379 
3380   while(op)
3381   {
3382     NdbError err= op->getNdbError();
3383     if (err.status != NdbError::Success)
3384     {
3385       if (ndb_to_mysql_error(&err) != (int) errcode)
3386         DBUG_RETURN(FALSE);
3387       if (op == last) break;
3388       op= trans->getNextCompletedOperation(op);
3389     }
3390     else
3391     {
3392       // We found a duplicate
3393       if (op->getType() == NdbOperation::UniqueIndexAccess)
3394       {
3395         if (errcode == HA_ERR_KEY_NOT_FOUND)
3396         {
3397           NdbIndexOperation *iop= (NdbIndexOperation *) op;
3398           const NDBINDEX *index= iop->getIndex();
3399           // Find the key_no of the index
3400           for(uint i= 0; i<table->s->keys; i++)
3401           {
3402             if (m_index[i].unique_index == index)
3403             {
3404               m_dupkey= i;
3405               break;
3406             }
3407           }
3408         }
3409       }
3410       else
3411       {
3412         // Must have been primary key access
3413         DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);
3414         if (errcode == HA_ERR_KEY_NOT_FOUND)
3415           m_dupkey= table->s->primary_key;
3416       }
3417       DBUG_RETURN(FALSE);
3418     }
3419   }
3420   DBUG_RETURN(TRUE);
3421 }
3422 
3423 
3424 /**
3425  * Check if record contains any null valued columns that are part of a key
3426  */
3427 static
3428 int
check_null_in_record(const KEY * key_info,const uchar * record)3429 check_null_in_record(const KEY* key_info, const uchar *record)
3430 {
3431   KEY_PART_INFO *curr_part, *end_part;
3432   curr_part= key_info->key_part;
3433   end_part= curr_part + key_info->user_defined_key_parts;
3434 
3435   while (curr_part != end_part)
3436   {
3437     if (curr_part->null_bit &&
3438         (record[curr_part->null_offset] & curr_part->null_bit))
3439       return 1;
3440     curr_part++;
3441   }
3442   return 0;
3443   /*
3444     We could instead pre-compute a bitmask in table_share with one bit for
3445     every null-bit in the key, and so check this just by OR'ing the bitmask
3446     with the null bitmap in the record.
3447     But not sure it's worth it.
3448   */
3449 }
3450 
3451 /* Empty mask and dummy row, for reading no attributes using NdbRecord. */
3452 /* Mask will be initialized to all zeros by linker. */
3453 static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];
3454 static char dummy_row[1];
3455 
3456 /**
3457   Peek to check if any rows already exist with conflicting
3458   primary key or unique index values
3459 */
3460 
peek_indexed_rows(const uchar * record,NDB_WRITE_OP write_op)3461 int ha_ndbcluster::peek_indexed_rows(const uchar *record,
3462                                      NDB_WRITE_OP write_op)
3463 {
3464   NdbTransaction *trans;
3465   const NdbOperation *op;
3466   const NdbOperation *first, *last;
3467   NdbOperation::OperationOptions options;
3468   NdbOperation::OperationOptions *poptions=NULL;
3469   options.optionsPresent = 0;
3470   uint i;
3471   int res, error;
3472   DBUG_ENTER("peek_indexed_rows");
3473   if (unlikely(!(trans= get_transaction(error))))
3474   {
3475     DBUG_RETURN(error);
3476   }
3477   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
3478   first= NULL;
3479   if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)
3480   {
3481     /*
3482      * Fetch any row with colliding primary key
3483      */
3484     const NdbRecord *key_rec=
3485       m_index[table->s->primary_key].ndb_unique_record_row;
3486 
3487     if (m_user_defined_partitioning)
3488     {
3489       uint32 part_id;
3490       int error;
3491       longlong func_value;
3492       my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
3493       error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
3494       dbug_tmp_restore_column_map(table->read_set, old_map);
3495       if (error)
3496       {
3497         m_part_info->err_value= func_value;
3498         DBUG_RETURN(error);
3499       }
3500       options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
3501       options.partitionId=part_id;
3502       poptions=&options;
3503     }
3504 
3505     if (!(op= trans->readTuple(key_rec, (const char *)record,
3506                                m_ndb_record, dummy_row, lm, empty_mask,
3507                                poptions,
3508                                sizeof(NdbOperation::OperationOptions))))
3509       ERR_RETURN(trans->getNdbError());
3510 
3511     first= op;
3512   }
3513   /*
3514    * Fetch any rows with colliding unique indexes
3515    */
3516   KEY* key_info;
3517   for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
3518   {
3519     if (i != table_share->primary_key &&
3520         key_info->flags & HA_NOSAME &&
3521         bitmap_is_overlapping(table->write_set, m_key_fields[i]))
3522     {
3523       /*
3524         A unique index is defined on table and it's being updated
3525         We cannot look up a NULL field value in a unique index. But since
3526         keys with NULLs are not indexed, such rows cannot conflict anyway, so
3527         we just skip the index in this case.
3528       */
3529       if (check_null_in_record(key_info, record))
3530       {
3531         DBUG_PRINT("info", ("skipping check for key with NULL"));
3532         continue;
3533       }
3534       if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))
3535       {
3536         DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));
3537         continue;
3538       }
3539 
3540       const NdbOperation *iop;
3541       const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;
3542       if (!(iop= trans->readTuple(key_rec, (const char *)record,
3543                                   m_ndb_record, dummy_row,
3544                                   lm, empty_mask)))
3545         ERR_RETURN(trans->getNdbError());
3546 
3547       if (!first)
3548         first= iop;
3549     }
3550   }
3551   last= trans->getLastDefinedOperation();
3552   if (first)
3553     res= execute_no_commit_ie(m_thd_ndb, trans);
3554   else
3555   {
3556     // Table has no keys
3557     table->status= STATUS_NOT_FOUND;
3558     DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3559   }
3560   if (check_all_operations_for_error(trans, first, last,
3561                                      HA_ERR_KEY_NOT_FOUND))
3562   {
3563     table->status= STATUS_NOT_FOUND;
3564     DBUG_RETURN(ndb_err(trans));
3565   }
3566   else
3567   {
3568     DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
3569   }
3570   DBUG_RETURN(0);
3571 }
3572 
3573 
3574 /**
3575   Read one record from NDB using unique secondary index.
3576 */
3577 
unique_index_read(const uchar * key,uint key_len,uchar * buf)3578 int ha_ndbcluster::unique_index_read(const uchar *key,
3579                                      uint key_len, uchar *buf)
3580 {
3581   NdbTransaction *trans= m_thd_ndb->trans;
3582   DBUG_ENTER("ha_ndbcluster::unique_index_read");
3583   DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
3584   DBUG_DUMP("key", key, key_len);
3585   DBUG_ASSERT(trans);
3586 
3587   NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
3588 
3589 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3590   if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess, active_index))
3591   {
3592     DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
3593     const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);
3594     if (unlikely(error))
3595       DBUG_RETURN(error);
3596 
3597     DBUG_ASSERT(m_active_query!=NULL);
3598     if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3599         m_active_query->getNdbError().code)
3600     {
3601       table->status= STATUS_GARBAGE;
3602       DBUG_RETURN(ndb_err(trans));
3603     }
3604 
3605     int result= fetch_next_pushed();
3606     if (result == NdbQuery::NextResult_gotRow)
3607     {
3608       DBUG_RETURN(0);
3609     }
3610     else if (result == NdbQuery::NextResult_scanComplete)
3611     {
3612       DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
3613     }
3614     else
3615     {
3616       DBUG_RETURN(ndb_err(trans));
3617     }
3618   }
3619   else
3620 #endif
3621   {
3622     if (m_pushed_join_operation == PUSHED_ROOT)
3623     {
3624       m_thd_ndb->m_pushed_queries_dropped++;
3625     }
3626 
3627     const NdbOperation *op;
3628 
3629     if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))
3630       ERR_RETURN(trans->getNdbError());
3631 
3632     if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
3633         op->getNdbError().code)
3634     {
3635       int err= ndb_err(trans);
3636       if(err==HA_ERR_KEY_NOT_FOUND)
3637         table->status= STATUS_NOT_FOUND;
3638       else
3639         table->status= STATUS_GARBAGE;
3640 
3641       DBUG_RETURN(err);
3642     }
3643 
3644     table->status= 0;
3645     DBUG_RETURN(0);
3646   }
3647 }
3648 
3649 int
scan_handle_lock_tuple(NdbScanOperation * scanOp,NdbTransaction * trans)3650 ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,
3651                                       NdbTransaction *trans)
3652 {
3653   DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");
3654   if (m_lock_tuple)
3655   {
3656     /*
3657       Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
3658       (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
3659       LOCK WITH SHARE MODE) and row was not explictly unlocked
3660       with unlock_row() call
3661     */
3662     const NdbOperation *op;
3663     // Lock row
3664     DBUG_PRINT("info", ("Keeping lock on scanned row"));
3665 
3666     if (!(op= scanOp->lockCurrentTuple(trans, m_ndb_record,
3667                                        dummy_row, empty_mask)))
3668     {
3669       /* purecov: begin inspected */
3670       m_lock_tuple= FALSE;
3671       ERR_RETURN(trans->getNdbError());
3672       /* purecov: end */
3673     }
3674     m_thd_ndb->m_unsent_bytes+=12;
3675   }
3676   m_lock_tuple= FALSE;
3677   DBUG_RETURN(0);
3678 }
3679 
fetch_next(NdbScanOperation * cursor)3680 inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
3681 {
3682   DBUG_ENTER("fetch_next");
3683   int local_check;
3684   int error;
3685   NdbTransaction *trans= m_thd_ndb->trans;
3686 
3687   DBUG_ASSERT(trans);
3688   if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
3689     DBUG_RETURN(error);
3690 
3691   bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
3692                     m_lock.type != TL_READ_WITH_SHARED_LOCKS;
3693   do {
3694     DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
3695     /*
3696       We can only handle one tuple with blobs at a time.
3697     */
3698     if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)
3699     {
3700       if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
3701         DBUG_RETURN(ndb_err(trans));
3702     }
3703 
3704     /* Should be no unexamined completed operations
3705        nextResult() on Blobs generates Blob part read ops,
3706        so we will free them here
3707     */
3708     release_completed_operations(trans);
3709 
3710     if ((local_check= cursor->nextResult(&_m_next_row,
3711                                          contact_ndb,
3712                                          m_thd_ndb->m_force_send)) == 0)
3713     {
3714       /*
3715 	Explicitly lock tuple if "select for update" or
3716 	"select lock in share mode"
3717       */
3718       m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
3719 		     ||
3720 		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
3721       DBUG_RETURN(0);
3722     }
3723     else if (local_check == 1 || local_check == 2)
3724     {
3725       // 1: No more records
3726       // 2: No more cached records
3727 
3728       /*
3729         Before fetching more rows and releasing lock(s),
3730         all pending update or delete operations should
3731         be sent to NDB
3732       */
3733       DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
3734                           (long) m_thd_ndb->m_unsent_bytes));
3735       if (m_thd_ndb->m_unsent_bytes)
3736       {
3737         if ((error = flush_bulk_insert()) != 0)
3738           DBUG_RETURN(error);
3739       }
3740       contact_ndb= (local_check == 2);
3741     }
3742     else
3743     {
3744       DBUG_RETURN(ndb_err(trans));
3745     }
3746   } while (local_check == 2);
3747 
3748   DBUG_RETURN(1);
3749 }
3750 
fetch_next_pushed()3751 int ha_ndbcluster::fetch_next_pushed()
3752 {
3753   DBUG_ENTER("fetch_next_pushed (from pushed operation)");
3754 
3755   DBUG_ASSERT(m_pushed_operation);
3756   NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);
3757 
3758   /**
3759    * Only prepare result & status from this operation in pushed join.
3760    * Consecutive rows are prepared through ::index_read_pushed() and
3761    * ::index_next_pushed() which unpack and set correct status for each row.
3762    */
3763   if (result == NdbQuery::NextResult_gotRow)
3764   {
3765     DBUG_ASSERT(m_next_row!=NULL);
3766     DBUG_PRINT("info", ("One more record found"));
3767     table->status= 0;
3768     unpack_record(table->record[0], m_next_row);
3769 //  m_thd_ndb->m_pushed_reads++;
3770 //  DBUG_RETURN(0)
3771   }
3772   else if (result == NdbQuery::NextResult_scanComplete)
3773   {
3774     DBUG_ASSERT(m_next_row==NULL);
3775     DBUG_PRINT("info", ("No more records"));
3776     table->status= STATUS_NOT_FOUND;
3777 //  m_thd_ndb->m_pushed_reads++;
3778 //  DBUG_RETURN(HA_ERR_END_OF_FILE);
3779   }
3780   else
3781   {
3782     DBUG_PRINT("info", ("Error from 'nextResult()'"));
3783     table->status= STATUS_GARBAGE;
3784 //  DBUG_ASSERT(false);
3785 //  DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3786   }
3787   DBUG_RETURN(result);
3788 }
3789 
3790 /**
3791   Get the first record from an indexed table access being a child
3792   operation in a pushed join. Fetch will be from prefetched
3793   cached records which are materialized into the bound buffer
3794   areas as result of this call.
3795 */
3796 
3797 int
index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3798 ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,
3799                                  key_part_map keypart_map)
3800 {
3801   DBUG_ENTER("index_read_pushed");
3802 
3803   // Handler might have decided to not execute the pushed joins which has been prepared
3804   // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3805   if (unlikely(!check_is_pushed()))
3806   {
3807     DBUG_RETURN(index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT));
3808   }
3809 
3810   // Might need to re-establish first result row (wrt. its parents which may have been navigated)
3811   NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();
3812 
3813   // Result from pushed operation will be referred by 'm_next_row' if non-NULL
3814   if (result == NdbQuery::NextResult_gotRow)
3815   {
3816     DBUG_ASSERT(m_next_row!=NULL);
3817     unpack_record(buf, m_next_row);
3818     table->status= 0;
3819     m_thd_ndb->m_pushed_reads++;
3820   }
3821   else
3822   {
3823     DBUG_ASSERT(result!=NdbQuery::NextResult_gotRow);
3824     table->status= STATUS_NOT_FOUND;
3825     DBUG_PRINT("info", ("No record found"));
3826 //  m_thd_ndb->m_pushed_reads++;
3827 //  DBUG_RETURN(HA_ERR_END_OF_FILE);
3828   }
3829   DBUG_RETURN(0);
3830 }
3831 
3832 
3833 /**
3834   Get the next record from an indexes table access being a child
3835   operation in a pushed join. Fetch will be from prefetched
3836   cached records which are materialized into the bound buffer
3837   areas as result of this call.
3838 */
index_next_pushed(uchar * buf)3839 int ha_ndbcluster::index_next_pushed(uchar *buf)
3840 {
3841   DBUG_ENTER("index_next_pushed");
3842 
3843   // Handler might have decided to not execute the pushed joins which has been prepared
3844   // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
3845   if (unlikely(!check_is_pushed()))
3846   {
3847     DBUG_RETURN(index_next(buf));
3848   }
3849 
3850   DBUG_ASSERT(m_pushed_join_operation>PUSHED_ROOT);  // Child of a pushed join
3851   DBUG_ASSERT(m_active_query==NULL);
3852 
3853   int res = fetch_next_pushed();
3854   if (res == NdbQuery::NextResult_gotRow)
3855   {
3856     DBUG_RETURN(0);
3857   }
3858   else if (res == NdbQuery::NextResult_scanComplete)
3859   {
3860     DBUG_RETURN(HA_ERR_END_OF_FILE);
3861   }
3862   else
3863   {
3864     DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3865   }
3866 }
3867 
3868 
3869 /**
3870   Get the next record of a started scan. Try to fetch
3871   it locally from NdbApi cached records if possible,
3872   otherwise ask NDB for more.
3873 
3874   @note
3875     If this is a update/delete make sure to not contact
3876     NDB before any pending ops have been sent to NDB.
3877 */
3878 
next_result(uchar * buf)3879 inline int ha_ndbcluster::next_result(uchar *buf)
3880 {
3881   int res;
3882   DBUG_ENTER("next_result");
3883 
3884   if (m_active_cursor)
3885   {
3886     if ((res= fetch_next(m_active_cursor)) == 0)
3887     {
3888       DBUG_PRINT("info", ("One more record found"));
3889 
3890       unpack_record(buf, m_next_row);
3891       table->status= 0;
3892       DBUG_RETURN(0);
3893     }
3894     else if (res == 1)
3895     {
3896       // No more records
3897       table->status= STATUS_NOT_FOUND;
3898 
3899       DBUG_PRINT("info", ("No more records"));
3900       DBUG_RETURN(HA_ERR_END_OF_FILE);
3901     }
3902     else
3903     {
3904       DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3905     }
3906   }
3907   else if (m_active_query)
3908   {
3909     res= fetch_next_pushed();
3910     if (res == NdbQuery::NextResult_gotRow)
3911     {
3912       DBUG_RETURN(0);
3913     }
3914     else if (res == NdbQuery::NextResult_scanComplete)
3915     {
3916       DBUG_RETURN(HA_ERR_END_OF_FILE);
3917     }
3918     else
3919     {
3920       DBUG_RETURN(ndb_err(m_thd_ndb->trans));
3921     }
3922   }
3923   else
3924     DBUG_RETURN(HA_ERR_END_OF_FILE);
3925 }
3926 
3927 /**
3928   Do a primary key or unique key index read operation.
3929   The key value is taken from a buffer in mysqld key format.
3930 */
3931 const NdbOperation *
pk_unique_index_read_key(uint idx,const uchar * key,uchar * buf,NdbOperation::LockMode lm,Uint32 * ppartition_id)3932 ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,
3933                                         NdbOperation::LockMode lm,
3934                                         Uint32 *ppartition_id)
3935 {
3936   const NdbOperation *op;
3937   const NdbRecord *key_rec;
3938   NdbOperation::OperationOptions options;
3939   NdbOperation::OperationOptions *poptions = NULL;
3940   options.optionsPresent= 0;
3941   NdbOperation::GetValueSpec gets[2];
3942 
3943   DBUG_ASSERT(m_thd_ndb->trans);
3944 
3945   if (idx != MAX_KEY)
3946     key_rec= m_index[idx].ndb_unique_record_key;
3947   else
3948     key_rec= m_ndb_hidden_key_record;
3949 
3950   /* Initialize the null bitmap, setting unused null bits to 1. */
3951   memset(buf, 0xff, table->s->null_bytes);
3952 
3953   if (table_share->primary_key == MAX_KEY)
3954   {
3955     get_hidden_fields_keyop(&options, gets);
3956     poptions= &options;
3957   }
3958 
3959   if (ppartition_id != NULL)
3960   {
3961     assert(m_user_defined_partitioning);
3962     options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
3963     options.partitionId= *ppartition_id;
3964     poptions= &options;
3965   }
3966 
3967   op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,
3968                                   (char *)buf, lm,
3969                                   (uchar *)(table->read_set->bitmap), poptions,
3970                                   sizeof(NdbOperation::OperationOptions));
3971 
3972   if (uses_blob_value(table->read_set) &&
3973       get_blob_values(op, buf, table->read_set) != 0)
3974     return NULL;
3975 
3976   return op;
3977 }
3978 
3979 extern void sql_print_information(const char *format, ...);
3980 
3981 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
3982 static
3983 bool
is_shrinked_varchar(const Field * field)3984 is_shrinked_varchar(const Field *field)
3985 {
3986   if (field->real_type() ==  MYSQL_TYPE_VARCHAR)
3987   {
3988     if (((Field_varstring*)field)->length_bytes == 1)
3989       return true;
3990   }
3991 
3992   return false;
3993 }
3994 
3995 int
pk_unique_index_read_key_pushed(uint idx,const uchar * key,Uint32 * ppartition_id)3996 ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx,
3997                                                const uchar *key,
3998                                                Uint32 *ppartition_id)
3999 {
4000   DBUG_ENTER("pk_unique_index_read_key_pushed");
4001   NdbOperation::OperationOptions options;
4002   NdbOperation::OperationOptions *poptions = NULL;
4003   options.optionsPresent= 0;
4004   NdbOperation::GetValueSpec gets[2];
4005 
4006   DBUG_ASSERT(m_thd_ndb->trans);
4007   DBUG_ASSERT(idx < MAX_KEY);
4008 
4009   if (m_active_query)
4010   {
4011     m_active_query->close(FALSE);
4012     m_active_query= NULL;
4013   }
4014 
4015   if (table_share->primary_key == MAX_KEY)
4016   {
4017     get_hidden_fields_keyop(&options, gets);
4018     poptions= &options;
4019   }
4020 
4021   if (ppartition_id != NULL)
4022   {
4023     assert(m_user_defined_partitioning);
4024     options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
4025     options.partitionId= *ppartition_id;
4026     poptions= &options;
4027   }
4028 
4029   KEY *key_def= &table->key_info[idx];
4030   KEY_PART_INFO *key_part;
4031 
4032   uint i;
4033   Uint32 offset= 0;
4034   NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];
4035   DBUG_ASSERT(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);
4036 
4037   uint map[ndb_pushed_join::MAX_KEY_PART];
4038   ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);
4039 
4040   // Bind key values defining root of pushed join
4041   for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)
4042   {
4043     bool shrinkVarChar= is_shrinked_varchar(key_part->field);
4044 
4045     if (key_part->null_bit)                         // Column is nullable
4046     {
4047       DBUG_ASSERT(idx != table_share->primary_key); // PK can't be nullable
4048       DBUG_ASSERT(*(key+offset)==0);                // Null values not allowed in key
4049                                                     // Value is imm. after NULL indicator
4050       paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);
4051     }
4052     else                                            // Non-nullable column
4053     {
4054       paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);
4055     }
4056     offset+= key_part->store_length;
4057   }
4058 
4059   const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);
4060   DBUG_RETURN(ret);
4061 }
4062 
4063 #endif
4064 
4065 /** Count number of columns in key part. */
4066 static uint
count_key_columns(const KEY * key_info,const key_range * key)4067 count_key_columns(const KEY *key_info, const key_range *key)
4068 {
4069   KEY_PART_INFO *first_key_part= key_info->key_part;
4070   KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;
4071   KEY_PART_INFO *key_part;
4072   uint length= 0;
4073   for(key_part= first_key_part; key_part < key_part_end; key_part++)
4074   {
4075     if (length >= key->length)
4076       break;
4077     length+= key_part->store_length;
4078   }
4079   return key_part - first_key_part;
4080 }
4081 
4082 /* Helper method to compute NDB index bounds. Note: does not set range_no. */
4083 /* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */
4084 void
compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,const KEY * key_info,const key_range * start_key,const key_range * end_key,int from)4085 compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,
4086                      const KEY *key_info,
4087                      const key_range *start_key, const key_range *end_key,
4088                      int from)
4089 {
4090   DBUG_ENTER("ha_ndbcluster::compute_index_bounds");
4091   DBUG_PRINT("info", ("from: %d", from));
4092 
4093 #ifndef DBUG_OFF
4094   DBUG_PRINT("info", ("key parts: %u length: %u",
4095                       key_info->user_defined_key_parts, key_info->key_length));
4096   {
4097     for (uint j= 0; j <= 1; j++)
4098     {
4099       const key_range* kr= (j == 0 ? start_key : end_key);
4100       if (kr)
4101       {
4102         DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",
4103                           j, kr->length, kr->keypart_map, kr->flag));
4104         DBUG_DUMP("key", kr->key, kr->length);
4105       }
4106       else
4107       {
4108         DBUG_PRINT("info", ("key range %u: none", j));
4109       }
4110     }
4111   }
4112 #endif
4113 
4114   if (start_key)
4115   {
4116     bound.low_key= (const char*)start_key->key;
4117     bound.low_key_count= count_key_columns(key_info, start_key);
4118     bound.low_inclusive=
4119       start_key->flag != HA_READ_AFTER_KEY &&
4120       start_key->flag != HA_READ_BEFORE_KEY;
4121   }
4122   else
4123   {
4124     bound.low_key= NULL;
4125     bound.low_key_count= 0;
4126   }
4127 
4128   /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */
4129   if (start_key &&
4130       (start_key->flag == HA_READ_KEY_EXACT ||
4131        start_key->flag == HA_READ_PREFIX_LAST) &&
4132       from != 1)
4133   {
4134     bound.high_key= bound.low_key;
4135     bound.high_key_count= bound.low_key_count;
4136     bound.high_inclusive= TRUE;
4137   }
4138   else if (end_key)
4139   {
4140     bound.high_key= (const char*)end_key->key;
4141     bound.high_key_count= count_key_columns(key_info, end_key);
4142     /*
4143       For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for
4144       the end_key.
4145       So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in
4146       start_key it does not set low_inclusive.
4147     */
4148     bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;
4149     if (end_key->flag == HA_READ_KEY_EXACT ||
4150         end_key->flag == HA_READ_PREFIX_LAST)
4151     {
4152       bound.low_key= bound.high_key;
4153       bound.low_key_count= bound.high_key_count;
4154       bound.low_inclusive= TRUE;
4155     }
4156   }
4157   else
4158   {
4159     bound.high_key= NULL;
4160     bound.high_key_count= 0;
4161   }
4162   DBUG_PRINT("info", ("start_flag=%d end_flag=%d"
4163                       " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",
4164                       start_key?start_key->flag:0, end_key?end_key->flag:0,
4165                       bound.low_key_count,
4166                       bound.low_key_count?bound.low_inclusive:0,
4167                       bound.high_key_count,
4168                       bound.high_key_count?bound.high_inclusive:0));
4169   DBUG_VOID_RETURN;
4170 }
4171 
4172 /**
4173   Start ordered index scan in NDB
4174 */
4175 
ordered_index_scan(const key_range * start_key,const key_range * end_key,bool sorted,bool descending,uchar * buf,part_id_range * part_spec)4176 int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
4177                                       const key_range *end_key,
4178                                       bool sorted, bool descending,
4179                                       uchar* buf, part_id_range *part_spec)
4180 {
4181   NdbTransaction *trans;
4182   NdbIndexScanOperation *op;
4183   int error;
4184 
4185   DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
4186   DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",
4187              active_index, sorted, descending, table->read_set->bitmap[0]));
4188   DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
4189 
4190   // Check that sorted seems to be initialised
4191   DBUG_ASSERT(sorted == 0 || sorted == 1);
4192 
4193   if (unlikely(!(trans= get_transaction(error))))
4194   {
4195     DBUG_RETURN(error);
4196   }
4197 
4198   if ((error= close_scan()))
4199     DBUG_RETURN(error);
4200 
4201   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4202 
4203   const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
4204   const NdbRecord *row_rec= m_ndb_record;
4205 
4206   NdbIndexScanOperation::IndexBound bound;
4207   NdbIndexScanOperation::IndexBound *pbound = NULL;
4208   if (start_key != NULL || end_key != NULL)
4209   {
4210     /*
4211        Compute bounds info, reversing range boundaries
4212        if descending
4213      */
4214     compute_index_bounds(bound,
4215                          table->key_info + active_index,
4216                          (descending?
4217                           end_key : start_key),
4218                          (descending?
4219                           start_key : end_key),
4220                          0);
4221     bound.range_no = 0;
4222     pbound = &bound;
4223   }
4224 
4225 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
4226   if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index,
4227                         sorted))
4228   {
4229     const int error= create_pushed_join();
4230     if (unlikely(error))
4231       DBUG_RETURN(error);
4232 
4233     NdbQuery* const query= m_active_query;
4234     if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)
4235                        ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending
4236                                                 : NdbQueryOptions::ScanOrdering_ascending))
4237     {
4238       ERR_RETURN(query->getNdbError());
4239     }
4240 
4241     if (pbound  && query->setBound(key_rec, pbound)!=0)
4242       ERR_RETURN(query->getNdbError());
4243 
4244     m_thd_ndb->m_scan_count++;
4245 
4246     bool prunable = false;
4247     if (unlikely(query->isPrunable(prunable) != 0))
4248       ERR_RETURN(query->getNdbError());
4249     if (prunable)
4250       m_thd_ndb->m_pruned_scan_count++;
4251 
4252     DBUG_ASSERT(!uses_blob_value(table->read_set));  // Can't have BLOB in pushed joins (yet)
4253   }
4254   else
4255 #endif
4256   {
4257     if (m_pushed_join_operation == PUSHED_ROOT)
4258     {
4259       m_thd_ndb->m_pushed_queries_dropped++;
4260     }
4261 
4262     NdbScanOperation::ScanOptions options;
4263     options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;
4264     options.scan_flags=0;
4265 
4266     NdbOperation::GetValueSpec gets[2];
4267     if (table_share->primary_key == MAX_KEY)
4268       get_hidden_fields_scan(&options, gets);
4269 
4270     if (lm == NdbOperation::LM_Read)
4271       options.scan_flags|= NdbScanOperation::SF_KeyInfo;
4272     if (sorted)
4273       options.scan_flags|= NdbScanOperation::SF_OrderByFull;
4274     if (descending)
4275       options.scan_flags|= NdbScanOperation::SF_Descending;
4276 
4277     /* Partition pruning */
4278     if (m_use_partition_pruning &&
4279         m_user_defined_partitioning && part_spec != NULL &&
4280         part_spec->start_part == part_spec->end_part)
4281     {
4282       /* Explicitly set partition id when pruning User-defined partitioned scan */
4283       options.partitionId = part_spec->start_part;
4284       options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4285     }
4286 
4287     NdbInterpretedCode code(m_table);
4288     if (m_cond && m_cond->generate_scan_filter(&code, &options))
4289       ERR_RETURN(code.getNdbError());
4290 
4291     if (!(op= trans->scanIndex(key_rec, row_rec, lm,
4292                                (uchar *)(table->read_set->bitmap),
4293                                pbound,
4294                                &options,
4295                                sizeof(NdbScanOperation::ScanOptions))))
4296       ERR_RETURN(trans->getNdbError());
4297 
4298     DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));
4299     m_thd_ndb->m_scan_count++;
4300     m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4301 
4302     if (uses_blob_value(table->read_set) &&
4303         get_blob_values(op, NULL, table->read_set) != 0)
4304       ERR_RETURN(op->getNdbError());
4305 
4306     m_active_cursor= op;
4307   }
4308 
4309   if (sorted)
4310   {
4311     m_thd_ndb->m_sorted_scan_count++;
4312   }
4313 
4314   if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4315     DBUG_RETURN(ndb_err(trans));
4316 
4317   DBUG_RETURN(next_result(buf));
4318 }
4319 
4320 static
4321 int
guess_scan_flags(NdbOperation::LockMode lm,const NDBTAB * tab,const MY_BITMAP * readset)4322 guess_scan_flags(NdbOperation::LockMode lm,
4323 		 const NDBTAB* tab, const MY_BITMAP* readset)
4324 {
4325   int flags= 0;
4326   flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
4327   if (tab->checkColumns(0, 0) & 2)
4328   {
4329     int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
4330 
4331     if (ret & 2)
4332     { // If disk columns...use disk scan
4333       flags |= NdbScanOperation::SF_DiskScan;
4334     }
4335     else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
4336     {
4337       // If no mem column is set and exclusive...guess disk scan
4338       flags |= NdbScanOperation::SF_DiskScan;
4339     }
4340   }
4341   return flags;
4342 }
4343 
4344 /*
4345   Start full table scan in NDB or unique index scan
4346  */
4347 
full_table_scan(const KEY * key_info,const key_range * start_key,const key_range * end_key,uchar * buf)4348 int ha_ndbcluster::full_table_scan(const KEY* key_info,
4349                                    const key_range *start_key,
4350                                    const key_range *end_key,
4351                                    uchar *buf)
4352 {
4353   int error;
4354   NdbTransaction *trans= m_thd_ndb->trans;
4355   part_id_range part_spec;
4356   bool use_set_part_id= FALSE;
4357   NdbOperation::GetValueSpec gets[2];
4358 
4359   DBUG_ENTER("full_table_scan");
4360   DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
4361 
4362   if (m_use_partition_pruning && m_user_defined_partitioning)
4363   {
4364     DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);
4365     part_spec.start_part= 0;
4366     part_spec.end_part= m_part_info->get_tot_partitions() - 1;
4367     prune_partition_set(table, &part_spec);
4368     DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
4369                         part_spec.start_part, part_spec.end_part));
4370     /*
4371       If partition pruning has found no partition in set
4372       we can return HA_ERR_END_OF_FILE
4373     */
4374     if (part_spec.start_part > part_spec.end_part)
4375     {
4376       DBUG_RETURN(HA_ERR_END_OF_FILE);
4377     }
4378 
4379     if (part_spec.start_part == part_spec.end_part)
4380     {
4381       /*
4382        * Only one partition is required to scan, if sorted is required
4383        * don't need it anymore since output from one ordered partitioned
4384        * index is always sorted.
4385        *
4386        * Note : This table scan pruning currently only occurs for
4387        * UserDefined partitioned tables.
4388        * It could be extended to occur for natively partitioned tables if
4389        * the Partitioning layer can make a key (e.g. start or end key)
4390        * available so that we can determine the correct pruning in the
4391        * NDBAPI layer.
4392        */
4393       use_set_part_id= TRUE;
4394       if (!trans)
4395         if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
4396                                                       error))))
4397           DBUG_RETURN(error);
4398     }
4399   }
4400   if (!trans)
4401     if (unlikely(!(trans= start_transaction(error))))
4402       DBUG_RETURN(error);
4403 
4404   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
4405   NdbScanOperation::ScanOptions options;
4406   options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |
4407                             NdbScanOperation::ScanOptions::SO_PARALLEL);
4408   options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);
4409   options.parallel= DEFAULT_PARALLELISM;
4410 
4411   if (use_set_part_id) {
4412     assert(m_user_defined_partitioning);
4413     options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
4414     options.partitionId = part_spec.start_part;
4415   };
4416 
4417   if (table_share->primary_key == MAX_KEY)
4418     get_hidden_fields_scan(&options, gets);
4419 
4420 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
4421   if (check_if_pushable(NdbQueryOperationDef::TableScan))
4422   {
4423     const int error= create_pushed_join();
4424     if (unlikely(error))
4425       DBUG_RETURN(error);
4426 
4427     m_thd_ndb->m_scan_count++;
4428     DBUG_ASSERT(!uses_blob_value(table->read_set));  // Can't have BLOB in pushed joins (yet)
4429   }
4430   else
4431 #endif
4432   {
4433     if (m_pushed_join_operation == PUSHED_ROOT)
4434     {
4435       m_thd_ndb->m_pushed_queries_dropped++;
4436     }
4437 
4438     NdbScanOperation *op;
4439     NdbInterpretedCode code(m_table);
4440 
4441     if (!key_info)
4442     {
4443       if (m_cond && m_cond->generate_scan_filter(&code, &options))
4444         ERR_RETURN(code.getNdbError());
4445     }
4446     else
4447     {
4448       /* Unique index scan in NDB (full table scan with scan filter) */
4449       DBUG_PRINT("info", ("Starting unique index scan"));
4450       if (!m_cond)
4451         m_cond= new ha_ndbcluster_cond;
4452 
4453       if (!m_cond)
4454       {
4455         my_errno= HA_ERR_OUT_OF_MEM;
4456         DBUG_RETURN(my_errno);
4457       }
4458       if (m_cond->generate_scan_filter_from_key(&code, &options, key_info, start_key, end_key, buf))
4459         ERR_RETURN(code.getNdbError());
4460     }
4461 
4462     if (!(op= trans->scanTable(m_ndb_record, lm,
4463                                (uchar *)(table->read_set->bitmap),
4464                                &options, sizeof(NdbScanOperation::ScanOptions))))
4465       ERR_RETURN(trans->getNdbError());
4466 
4467     m_thd_ndb->m_scan_count++;
4468     m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);
4469 
4470     DBUG_ASSERT(m_active_cursor==NULL);
4471     m_active_cursor= op;
4472 
4473     if (uses_blob_value(table->read_set) &&
4474         get_blob_values(op, NULL, table->read_set) != 0)
4475       ERR_RETURN(op->getNdbError());
4476   } // if (check_if_pushable(NdbQueryOperationDef::TableScan))
4477 
4478   if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
4479     DBUG_RETURN(ndb_err(trans));
4480   DBUG_PRINT("exit", ("Scan started successfully"));
4481   DBUG_RETURN(next_result(buf));
4482 } // ha_ndbcluster::full_table_scan()
4483 
4484 int
set_auto_inc(THD * thd,Field * field)4485 ha_ndbcluster::set_auto_inc(THD *thd, Field *field)
4486 {
4487   DBUG_ENTER("ha_ndbcluster::set_auto_inc");
4488   bool read_bit= bitmap_is_set(table->read_set, field->field_index);
4489   bitmap_set_bit(table->read_set, field->field_index);
4490   Uint64 next_val= (Uint64) field->val_int() + 1;
4491   if (!read_bit)
4492     bitmap_clear_bit(table->read_set, field->field_index);
4493   DBUG_RETURN(set_auto_inc_val(thd, next_val));
4494 }
4495 
4496 inline
4497 int
set_auto_inc_val(THD * thd,Uint64 value)4498 ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)
4499 {
4500   Ndb *ndb= get_ndb(thd);
4501   DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");
4502 #ifndef DBUG_OFF
4503   char buff[22];
4504   DBUG_PRINT("info",
4505              ("Trying to set next auto increment value to %s",
4506               llstr(value, buff)));
4507 #endif
4508   if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))
4509   {
4510     Ndb_tuple_id_range_guard g(m_share);
4511     if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)
4512         == -1)
4513       ERR_RETURN(ndb->getNdbError());
4514   }
4515   DBUG_RETURN(0);
4516 }
4517 
4518 Uint32
setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])4519 ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])
4520 {
4521   Uint32 num_gets= 0;
4522   /*
4523     We need to read the hidden primary key, and possibly the FRAGMENT
4524     pseudo-column.
4525   */
4526   gets[num_gets].column= get_hidden_key_column();
4527   gets[num_gets].appStorage= &m_ref;
4528   num_gets++;
4529   if (m_user_defined_partitioning)
4530   {
4531     /* Need to read partition id to support ORDER BY columns. */
4532     gets[num_gets].column= NdbDictionary::Column::FRAGMENT;
4533     gets[num_gets].appStorage= &m_part_id;
4534     num_gets++;
4535   }
4536   return num_gets;
4537 }
4538 
4539 void
get_hidden_fields_keyop(NdbOperation::OperationOptions * options,NdbOperation::GetValueSpec gets[2])4540 ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,
4541                                        NdbOperation::GetValueSpec gets[2])
4542 {
4543   Uint32 num_gets= setup_get_hidden_fields(gets);
4544   options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;
4545   options->extraGetValues= gets;
4546   options->numExtraGetValues= num_gets;
4547 }
4548 
4549 void
get_hidden_fields_scan(NdbScanOperation::ScanOptions * options,NdbOperation::GetValueSpec gets[2])4550 ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,
4551                                       NdbOperation::GetValueSpec gets[2])
4552 {
4553   Uint32 num_gets= setup_get_hidden_fields(gets);
4554   options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;
4555   options->extraGetValues= gets;
4556   options->numExtraGetValues= num_gets;
4557 }
4558 
4559 inline void
eventSetAnyValue(THD * thd,NdbOperation::OperationOptions * options) const4560 ha_ndbcluster::eventSetAnyValue(THD *thd,
4561                                 NdbOperation::OperationOptions *options) const
4562 {
4563   options->anyValue= 0;
4564   if (unlikely(m_slow_path))
4565   {
4566     /*
4567       Ignore TNTO_NO_LOGGING for slave thd.  It is used to indicate
4568       log-slave-updates option.  This is instead handled in the
4569       injector thread, by looking explicitly at the
4570       opt_log_slave_updates flag.
4571     */
4572     Thd_ndb *thd_ndb= get_thd_ndb(thd);
4573     if (thd->slave_thread)
4574     {
4575       /*
4576         Slave-thread, we are applying a replicated event.
4577         We set the server_id to the value received from the log which
4578         may be a composite of server_id and other data according
4579         to the server_id_bits option.
4580         In future it may be useful to support *not* mapping composite
4581         AnyValues to/from Binlogged server-ids
4582       */
4583       options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4584       options->anyValue = thd_unmasked_server_id(thd);
4585     }
4586     else if (thd_ndb->trans_options & TNTO_NO_LOGGING)
4587     {
4588       options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4589       ndbcluster_anyvalue_set_nologging(options->anyValue);
4590     }
4591   }
4592 #ifndef DBUG_OFF
4593   /*
4594     MySQLD will set the user-portion of AnyValue (if any) to all 1s
4595     This tests code filtering ServerIds on the value of server-id-bits.
4596   */
4597   const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");
4598   if (p != 0  && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
4599   {
4600     options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
4601     dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);
4602   }
4603 #endif
4604 }
4605 
4606 #ifdef HAVE_NDB_BINLOG
4607 /**
4608    prepare_conflict_detection
4609 
4610    This method is called during operation definition by the slave,
4611    when writing to a table with conflict detection defined.
4612 
4613    It is responsible for defining and adding any operation filtering
4614    required, and for saving any operation definition state required
4615    for post-execute analysis
4616 */
4617 int
prepare_conflict_detection(enum_conflicting_op_type op_type,const NdbRecord * key_rec,const uchar * old_data,const uchar * new_data,NdbInterpretedCode * code,NdbOperation::OperationOptions * options)4618 ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,
4619                                           const NdbRecord* key_rec,
4620                                           const uchar* old_data,
4621                                           const uchar* new_data,
4622                                           NdbInterpretedCode* code,
4623                                           NdbOperation::OperationOptions* options)
4624 {
4625   DBUG_ENTER("prepare_conflict_detection");
4626 
4627   int res = 0;
4628   const st_conflict_fn_def* conflict_fn = m_share->m_cfn_share->m_conflict_fn;
4629   assert( conflict_fn != NULL );
4630 
4631 
4632   /*
4633      Prepare interpreted code for operation (update + delete only) according
4634      to algorithm used
4635   */
4636   if (op_type != WRITE_ROW)
4637   {
4638     res = conflict_fn->prep_func(m_share->m_cfn_share,
4639                                  op_type,
4640                                  old_data,
4641                                  new_data,
4642                                  table->write_set,
4643                                  code);
4644 
4645     if (!res)
4646     {
4647       /* Attach conflict detecting filter program to operation */
4648       options->optionsPresent|=NdbOperation::OperationOptions::OO_INTERPRETED;
4649       options->interpretedCode= code;
4650     }
4651   } // if (op_type != WRITE_ROW)
4652 
4653   g_ndb_slave_state.current_conflict_defined_op_count++;
4654 
4655   /* Now save data for potential insert to exceptions table... */
4656   const uchar* row_to_save = (op_type == DELETE_ROW)? old_data : new_data;
4657   Ndb_exceptions_data ex_data;
4658   ex_data.share= m_share;
4659   ex_data.key_rec= key_rec;
4660   ex_data.op_type= op_type;
4661   /*
4662     We need to save the row data for possible conflict resolution after
4663     execute().
4664   */
4665   ex_data.row= copy_row_to_buffer(m_thd_ndb, row_to_save);
4666   uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));
4667   if (ex_data.row == NULL || ex_data_buffer == NULL)
4668   {
4669     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
4670   }
4671   memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));
4672 
4673   /* Store ptr to exceptions data in operation 'customdata' ptr */
4674   options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;
4675   options->customData= (void*)ex_data_buffer;
4676 
4677   DBUG_RETURN(0);
4678 }
4679 
4680 /**
4681    handle_conflict_op_error
4682 
4683    This method is called when an error is detected after executing an
4684    operation with conflict detection active.
4685 
4686    If the operation error is related to conflict detection, handling
4687    starts.
4688 
4689    Handling involves incrementing the relevant counter, and optionally
4690    refreshing the row and inserting an entry into the exceptions table
4691 */
4692 
4693 int
handle_conflict_op_error(Thd_ndb * thd_ndb,NdbTransaction * trans,const NdbError & err,const NdbOperation * op)4694 handle_conflict_op_error(Thd_ndb* thd_ndb,
4695                          NdbTransaction* trans,
4696                          const NdbError& err,
4697                          const NdbOperation* op)
4698 {
4699   DBUG_ENTER("handle_conflict_op_error");
4700   DBUG_PRINT("info", ("ndb error: %d", err.code));
4701 
4702   if ((err.code == (int) error_conflict_fn_violation) ||
4703       (err.code == (int) error_op_after_refresh_op) ||
4704       (err.classification == NdbError::ConstraintViolation) ||
4705       (err.classification == NdbError::NoDataFound))
4706   {
4707     DBUG_PRINT("info",
4708                ("err.code %s (int) error_conflict_fn_violation, "
4709                 "err.classification %s",
4710                 err.code == (int) error_conflict_fn_violation ? "==" : "!=",
4711                 err.classification
4712                 == NdbError::ConstraintViolation
4713                 ? "== NdbError::ConstraintViolation"
4714                 : (err.classification == NdbError::NoDataFound
4715                    ? "== NdbError::NoDataFound" : "!=")));
4716 
4717     enum_conflict_cause conflict_cause;
4718 
4719     if ((err.code == (int) error_conflict_fn_violation) ||
4720         (err.code == (int) error_op_after_refresh_op))
4721     {
4722       conflict_cause= ROW_IN_CONFLICT;
4723     }
4724     else if (err.classification == NdbError::ConstraintViolation)
4725     {
4726       conflict_cause= ROW_ALREADY_EXISTS;
4727     }
4728     else
4729     {
4730       assert(err.classification == NdbError::NoDataFound);
4731       conflict_cause= ROW_DOES_NOT_EXIST;
4732     }
4733 
4734     const void* buffer=op->getCustomData();
4735     assert(buffer);
4736     Ndb_exceptions_data ex_data;
4737     memcpy(&ex_data, buffer, sizeof(ex_data));
4738     NDB_SHARE *share= ex_data.share;
4739     const NdbRecord* key_rec= ex_data.key_rec;
4740     const uchar* row= ex_data.row;
4741     enum_conflicting_op_type op_type = ex_data.op_type;
4742     DBUG_ASSERT(share != NULL && row != NULL);
4743 
4744     NDB_CONFLICT_FN_SHARE* cfn_share= share->m_cfn_share;
4745     if (cfn_share)
4746     {
4747       enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;
4748       bool haveExTable = cfn_share->m_ex_tab != NULL;
4749 
4750       g_ndb_slave_state.current_violation_count[cft]++;
4751 
4752       {
4753         NdbError handle_error;
4754         if (handle_row_conflict(cfn_share,
4755                                 share->table_name,
4756                                 key_rec,
4757                                 row,
4758                                 op_type,
4759                                 conflict_cause,
4760                                 err,
4761                                 trans,
4762                                 handle_error))
4763         {
4764           /* Error with handling of row conflict */
4765           char msg[FN_REFLEN];
4766           my_snprintf(msg, sizeof(msg), "Row conflict handling "
4767                       "on table %s hit Ndb error %d '%s'",
4768                       share->table_name,
4769                       handle_error.code,
4770                       handle_error.message);
4771 
4772           if (handle_error.status == NdbError::TemporaryError)
4773           {
4774             /* Slave will roll back and retry entire transaction. */
4775             ERR_RETURN(handle_error);
4776           }
4777           else
4778           {
4779             push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4780                                 ER_EXCEPTIONS_WRITE_ERROR,
4781                                 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
4782             /* Slave will stop replication. */
4783             DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
4784           }
4785         }
4786       }
4787 
4788 
4789       if (haveExTable)
4790       {
4791         NdbError ex_err;
4792         if (write_conflict_row(share, trans, row, ex_err))
4793         {
4794           char msg[FN_REFLEN];
4795           my_snprintf(msg, sizeof(msg), "table %s NDB error %d '%s'",
4796                       cfn_share->m_ex_tab->getName(),
4797                       ex_err.code, ex_err.message);
4798 
4799           NdbDictionary::Dictionary* dict= thd_ndb->ndb->getDictionary();
4800 
4801           if (ex_err.classification == NdbError::SchemaError)
4802           {
4803             dict->removeTableGlobal(*(cfn_share->m_ex_tab), false);
4804             cfn_share->m_ex_tab= NULL;
4805           }
4806           else if (ex_err.status == NdbError::TemporaryError)
4807           {
4808             /* Slave will roll back and retry entire transaction. */
4809             ERR_RETURN(ex_err);
4810           }
4811           else
4812           {
4813             push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4814                                 ER_EXCEPTIONS_WRITE_ERROR,
4815                                 ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
4816             /* Slave will stop replication. */
4817             DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
4818           }
4819         }
4820       } // if (haveExTable)
4821 
4822       DBUG_RETURN(0);
4823     }
4824     else
4825     {
4826       DBUG_PRINT("info", ("missing cfn_share"));
4827       DBUG_RETURN(0); // TODO : Correct?
4828     }
4829   }
4830   else
4831   {
4832     /* Non conflict related error */
4833     DBUG_PRINT("info", ("err.code == %u", err.code));
4834     DBUG_RETURN(err.code);
4835   }
4836 
4837   DBUG_RETURN(0); // Reachable?
4838 }
4839 #endif /* HAVE_NDB_BINLOG */
4840 
4841 
4842 #ifdef HAVE_NDB_BINLOG
4843 /*
4844   is_serverid_local
4845 */
is_serverid_local(Uint32 serverid)4846 static bool is_serverid_local(Uint32 serverid)
4847 {
4848   /*
4849      If it's not our serverid, check the
4850      IGNORE_SERVER_IDS setting to check if
4851      it's local.
4852   */
4853   return ((serverid == ::server_id) ||
4854           ndb_mi_get_ignore_server_id(serverid));
4855 }
4856 #endif
4857 
write_row(uchar * record)4858 int ha_ndbcluster::write_row(uchar *record)
4859 {
4860   DBUG_ENTER("ha_ndbcluster::write_row");
4861 #ifdef HAVE_NDB_BINLOG
4862   if (m_share == ndb_apply_status_share && table->in_use->slave_thread)
4863   {
4864     uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();
4865     uint64 row_epoch;
4866     memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),
4867            sizeof(row_server_id));
4868     memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),
4869            sizeof(row_epoch));
4870     g_ndb_slave_state.atApplyStatusWrite(master_server_id,
4871                                          row_server_id,
4872                                          row_epoch,
4873                                          is_serverid_local(row_server_id));
4874   }
4875 #endif /* HAVE_NDB_BINLOG */
4876   DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));
4877 }
4878 
4879 /**
4880   Insert one record into NDB
4881 */
ndb_write_row(uchar * record,bool primary_key_update,bool batched_update)4882 int ha_ndbcluster::ndb_write_row(uchar *record,
4883                                  bool primary_key_update,
4884                                  bool batched_update)
4885 {
4886   bool has_auto_increment;
4887   const NdbOperation *op;
4888   THD *thd= table->in_use;
4889   Thd_ndb *thd_ndb= m_thd_ndb;
4890   NdbTransaction *trans;
4891   uint32 part_id;
4892   int error;
4893   NdbOperation::SetValueSpec sets[3];
4894   Uint32 num_sets= 0;
4895   DBUG_ENTER("ha_ndbcluster::ndb_write_row");
4896 
4897   error = check_slave_state(thd);
4898   if (unlikely(error))
4899     DBUG_RETURN(error);
4900 
4901   has_auto_increment= (table->next_number_field && record == table->record[0]);
4902 
4903   if (has_auto_increment && table_share->primary_key != MAX_KEY)
4904   {
4905     /*
4906      * Increase any auto_incremented primary key
4907      */
4908     m_skip_auto_increment= FALSE;
4909     if ((error= update_auto_increment()))
4910       DBUG_RETURN(error);
4911     m_skip_auto_increment= (insert_id_for_cur_row == 0);
4912   }
4913 
4914   /*
4915    * If IGNORE the ignore constraint violations on primary and unique keys
4916    */
4917   if (!m_use_write && m_ignore_dup_key)
4918   {
4919     /*
4920       compare if expression with that in start_bulk_insert()
4921       start_bulk_insert will set parameters to ensure that each
4922       write_row is committed individually
4923     */
4924     int peek_res= peek_indexed_rows(record, NDB_INSERT);
4925 
4926     if (!peek_res)
4927     {
4928       DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
4929     }
4930     if (peek_res != HA_ERR_KEY_NOT_FOUND)
4931       DBUG_RETURN(peek_res);
4932   }
4933 
4934   bool uses_blobs= uses_blob_value(table->write_set);
4935 
4936   Uint64 auto_value;
4937   const NdbRecord *key_rec;
4938   const uchar *key_row;
4939   if (table_share->primary_key == MAX_KEY)
4940   {
4941     /* Table has hidden primary key. */
4942     Ndb *ndb= get_ndb(thd);
4943     uint retries= NDB_AUTO_INCREMENT_RETRIES;
4944     int retry_sleep= 30; /* 30 milliseconds, transaction */
4945     for (;;)
4946     {
4947       Ndb_tuple_id_range_guard g(m_share);
4948       if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)
4949       {
4950 	if (--retries && !thd->killed &&
4951 	    ndb->getNdbError().status == NdbError::TemporaryError)
4952 	{
4953 	  do_retry_sleep(retry_sleep);
4954 	  continue;
4955 	}
4956 	ERR_RETURN(ndb->getNdbError());
4957       }
4958       break;
4959     }
4960     sets[num_sets].column= get_hidden_key_column();
4961     sets[num_sets].value= &auto_value;
4962     num_sets++;
4963     key_rec= m_ndb_hidden_key_record;
4964     key_row= (const uchar *)&auto_value;
4965   }
4966   else
4967   {
4968     key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
4969     key_row= record;
4970   }
4971 
4972   trans= thd_ndb->trans;
4973   if (m_user_defined_partitioning)
4974   {
4975     DBUG_ASSERT(m_use_partition_pruning);
4976     longlong func_value= 0;
4977     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
4978     error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4979     dbug_tmp_restore_column_map(table->read_set, old_map);
4980     if (unlikely(error))
4981     {
4982       m_part_info->err_value= func_value;
4983       DBUG_RETURN(error);
4984     }
4985     {
4986       /*
4987         We need to set the value of the partition function value in
4988         NDB since the NDB kernel doesn't have easy access to the function
4989         to calculate the value.
4990       */
4991       if (func_value >= INT_MAX32)
4992         func_value= INT_MAX32;
4993       sets[num_sets].column= get_partition_id_column();
4994       sets[num_sets].value= &func_value;
4995       num_sets++;
4996     }
4997     if (!trans)
4998       if (unlikely(!(trans= start_transaction_part_id(part_id, error))))
4999         DBUG_RETURN(error);
5000   }
5001   else if (!trans)
5002   {
5003     if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))
5004       DBUG_RETURN(error);
5005   }
5006   DBUG_ASSERT(trans);
5007 
5008   ha_statistic_increment(&SSV::ha_write_count);
5009 
5010   /*
5011      Setup OperationOptions
5012    */
5013   NdbOperation::OperationOptions options;
5014   NdbOperation::OperationOptions *poptions = NULL;
5015   options.optionsPresent=0;
5016 
5017   eventSetAnyValue(thd, &options);
5018   bool need_flush= add_row_check_if_batch_full(thd_ndb);
5019 
5020   const Uint32 authorValue = 1;
5021   if ((thd->slave_thread) &&
5022       (m_table->getExtraRowAuthorBits()))
5023   {
5024     /* Set author to indicate slave updated last */
5025     sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5026     sets[num_sets].value= &authorValue;
5027     num_sets++;
5028   }
5029 
5030   if (m_user_defined_partitioning)
5031   {
5032     options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
5033     options.partitionId= part_id;
5034   }
5035   if (num_sets)
5036   {
5037     options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;
5038     options.extraSetValues= sets;
5039     options.numExtraSetValues= num_sets;
5040   }
5041   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5042   {
5043     options.optionsPresent |=
5044       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5045   }
5046 
5047   if (options.optionsPresent != 0)
5048     poptions=&options;
5049 
5050   const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;
5051   uint32 tmpBitmapSpace[bitmapSz];
5052   MY_BITMAP tmpBitmap;
5053   MY_BITMAP *user_cols_written_bitmap;
5054 #ifdef HAVE_NDB_BINLOG
5055   bool haveConflictFunction =
5056     (thd->slave_thread &&
5057      m_share->m_cfn_share &&
5058      m_share->m_cfn_share->m_conflict_fn);
5059 #endif
5060 
5061   if (m_use_write
5062 #ifdef HAVE_NDB_BINLOG
5063       /* Conflict detection must use normal Insert */
5064       && !haveConflictFunction
5065 #endif
5066       )
5067   {
5068     /* Should we use the supplied table writeset or not?
5069      * For a REPLACE command, we should ignore it, and write
5070      * all columns to get correct REPLACE behaviour.
5071      * For applying Binlog events, we need to use the writeset
5072      * to avoid trampling unchanged columns when an update is
5073      * logged as a WRITE
5074      */
5075     bool useWriteSet= isManualBinlogExec(thd);
5076 
5077 #ifdef HAVE_NDB_BINLOG
5078     /* Slave always uses writeset
5079      * TODO : What about SBR replicating a
5080      * REPLACE command?
5081      */
5082     useWriteSet |= thd->slave_thread;
5083 #endif
5084     uchar* mask;
5085 
5086     if (useWriteSet)
5087     {
5088       user_cols_written_bitmap= table->write_set;
5089       mask= (uchar *)(user_cols_written_bitmap->bitmap);
5090     }
5091     else
5092     {
5093       user_cols_written_bitmap= NULL;
5094       mask= NULL;
5095     }
5096     /* TODO : Add conflict detection etc when interpreted write supported */
5097     op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,
5098                           (char *)record, mask,
5099                           poptions, sizeof(NdbOperation::OperationOptions));
5100   }
5101   else
5102   {
5103 #ifdef HAVE_NDB_BINLOG
5104     if (haveConflictFunction)
5105     {
5106       /* Conflict detection in slave thread */
5107       if (unlikely((error = prepare_conflict_detection(WRITE_ROW,
5108                                                        key_rec,
5109                                                        NULL,    /* old_data */
5110                                                        record,  /* new_data */
5111                                                        NULL,    /* code */
5112                                                        &options))))
5113         DBUG_RETURN(error);
5114     }
5115 #endif
5116     uchar *mask;
5117 
5118     /* Check whether Ndb table definition includes any default values. */
5119     if (m_table->hasDefaultValues())
5120     {
5121       DBUG_PRINT("info", ("Not sending values for native defaulted columns"));
5122 
5123       /*
5124         If Ndb is unaware of the table's defaults, we must provide all column values to the insert.
5125         This is done using a NULL column mask.
5126         If Ndb is aware of the table's defaults, we only need to provide
5127         the columns explicitly mentioned in the write set,
5128         plus any extra columns required due to bug#41616.
5129         plus the primary key columns required due to bug#42238.
5130       */
5131       /*
5132         The following code for setting user_cols_written_bitmap
5133         should be removed after BUG#41616 and Bug#42238 are fixed
5134       */
5135       /* Copy table write set so that we can add to it */
5136       user_cols_written_bitmap= &tmpBitmap;
5137       bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,
5138                   table->write_set->n_bits, false);
5139       bitmap_copy(user_cols_written_bitmap, table->write_set);
5140 
5141       for (uint i= 0; i < table->s->fields; i++)
5142       {
5143         Field *field= table->field[i];
5144         DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "
5145                             "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",
5146                             i,
5147                             field->field_index,
5148                             field->real_type(),
5149                             field->flags & NO_DEFAULT_VALUE_FLAG,
5150                             field->flags & PRI_KEY_FLAG));
5151         if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616
5152                              PRI_KEY_FLAG)) ||       // bug 42238
5153             ! type_supports_default_value(field->real_type()))
5154         {
5155           bitmap_set_bit(user_cols_written_bitmap, field->field_index);
5156         }
5157       }
5158 
5159       mask= (uchar *)(user_cols_written_bitmap->bitmap);
5160     }
5161     else
5162     {
5163       /* No defaults in kernel, provide all columns ourselves */
5164       DBUG_PRINT("info", ("No native defaults, sending all values"));
5165       user_cols_written_bitmap= NULL;
5166       mask = NULL;
5167     }
5168 
5169     /* Using insert, we write all non default columns */
5170     op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,
5171                            (char *)record, mask, // Default value should be masked
5172                            poptions, sizeof(NdbOperation::OperationOptions));
5173   }
5174   if (!(op))
5175     ERR_RETURN(trans->getNdbError());
5176 
5177   bool do_batch= !need_flush &&
5178     (batched_update || thd_allow_batch(thd));
5179   uint blob_count= 0;
5180   if (table_share->blob_fields > 0)
5181   {
5182     my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
5183     /* Set Blob values for all columns updated by the operation */
5184     int res= set_blob_values(op, record - table->record[0],
5185                              user_cols_written_bitmap, &blob_count, do_batch);
5186     dbug_tmp_restore_column_map(table->read_set, old_map);
5187     if (res != 0)
5188       DBUG_RETURN(res);
5189   }
5190 
5191   m_rows_changed++;
5192 
5193   /*
5194     Execute write operation
5195     NOTE When doing inserts with many values in
5196     each INSERT statement it should not be necessary
5197     to NoCommit the transaction between each row.
5198     Find out how this is detected!
5199   */
5200   m_rows_inserted++;
5201   no_uncommitted_rows_update(1);
5202   if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||
5203       primary_key_update ||
5204       need_flush)
5205   {
5206     int res= flush_bulk_insert();
5207     if (res != 0)
5208     {
5209       m_skip_auto_increment= TRUE;
5210       DBUG_RETURN(res);
5211     }
5212   }
5213   if ((has_auto_increment) && (m_skip_auto_increment))
5214   {
5215     int ret_val;
5216     if ((ret_val= set_auto_inc(thd, table->next_number_field)))
5217     {
5218       DBUG_RETURN(ret_val);
5219     }
5220   }
5221   m_skip_auto_increment= TRUE;
5222 
5223   DBUG_PRINT("exit",("ok"));
5224   DBUG_RETURN(0);
5225 }
5226 
5227 
5228 /* Compare if an update changes the primary key in a row. */
primary_key_cmp(const uchar * old_row,const uchar * new_row)5229 int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)
5230 {
5231   uint keynr= table_share->primary_key;
5232   KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
5233   KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;
5234 
5235   for (; key_part != end ; key_part++)
5236   {
5237     if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))
5238       continue;
5239 
5240     /* The primary key does not allow NULLs. */
5241     DBUG_ASSERT(!key_part->null_bit);
5242 
5243     if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
5244     {
5245 
5246       if (key_part->field->cmp_binary((old_row + key_part->offset),
5247                                       (new_row + key_part->offset),
5248                                       (ulong) key_part->length))
5249         return 1;
5250     }
5251     else
5252     {
5253       if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
5254                  key_part->length))
5255         return 1;
5256     }
5257   }
5258   return 0;
5259 }
5260 
5261 #ifdef HAVE_NDB_BINLOG
5262 int
handle_row_conflict(NDB_CONFLICT_FN_SHARE * cfn_share,const char * table_name,const NdbRecord * key_rec,const uchar * pk_row,enum_conflicting_op_type op_type,enum_conflict_cause conflict_cause,const NdbError & conflict_error,NdbTransaction * conflict_trans,NdbError & err)5263 handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
5264                     const char* table_name,
5265                     const NdbRecord* key_rec,
5266                     const uchar* pk_row,
5267                     enum_conflicting_op_type op_type,
5268                     enum_conflict_cause conflict_cause,
5269                     const NdbError& conflict_error,
5270                     NdbTransaction* conflict_trans,
5271                     NdbError& err)
5272 {
5273   DBUG_ENTER("handle_row_conflict");
5274 
5275   if (cfn_share->m_flags & CFF_REFRESH_ROWS)
5276   {
5277     /* A conflict has been detected between an applied replicated operation
5278      * and the data in the DB.
5279      * The attempt to change the local DB will have been rejected.
5280      * We now take steps to generate a refresh Binlog event so that
5281      * other clusters will be re-aligned.
5282      */
5283     DBUG_PRINT("info", ("Conflict on table %s.  Operation type : %s, "
5284                         "conflict cause :%s, conflict error : %u : %s",
5285                         table_name,
5286                         ((op_type == WRITE_ROW)? "WRITE_ROW":
5287                          (op_type == UPDATE_ROW)? "UPDATE_ROW":
5288                          "DELETE_ROW"),
5289                         ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":
5290                          (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":
5291                          "ROW_IN_CONFLICT"),
5292                         conflict_error.code,
5293                         conflict_error.message));
5294 
5295     assert(key_rec != NULL);
5296     assert(pk_row != NULL);
5297 
5298     /* When the slave splits an epoch into batches, a conflict row detected
5299      * and refreshed in an early batch can be written to by operations in
5300      * a later batch.  As the operations will not have applied, and the
5301      * row has already been refreshed, we need not attempt to refresh
5302      * it again
5303      */
5304     if ((conflict_cause == ROW_IN_CONFLICT) &&
5305         (conflict_error.code == (int) error_op_after_refresh_op))
5306     {
5307       /* Attempt to apply an operation after the row was refreshed
5308        * Ignore the error
5309        */
5310       DBUG_PRINT("info", ("Operation after refresh error - ignoring"));
5311       DBUG_RETURN(0);
5312     }
5313 
5314     /* When a delete operation finds that the row does not exist, it indicates
5315      * a DELETE vs DELETE conflict.  If we refresh the row then we can get
5316      * non deterministic behaviour depending on slave batching as follows :
5317      *   Row is deleted
5318      *
5319      *     Case 1
5320      *       Slave applied DELETE, INSERT in 1 batch
5321      *
5322      *         After first batch, the row is present (due to INSERT), it is
5323      *         refreshed.
5324      *
5325      *     Case 2
5326      *       Slave applied DELETE in 1 batch, INSERT in 2nd batch
5327      *
5328      *         After first batch, the row is not present, it is refreshed
5329      *         INSERT is then rejected.
5330      *
5331      * The problem of not being able to 'record' a DELETE vs DELETE conflict
5332      * is known.  We attempt at least to give consistent behaviour for
5333      * DELETE vs DELETE conflicts by :
5334      *   NOT refreshing a row when a DELETE vs DELETE conflict is detected
5335      * This should map all batching scenarios onto Case1.
5336      */
5337     if ((op_type == DELETE_ROW) &&
5338         (conflict_cause == ROW_DOES_NOT_EXIST))
5339     {
5340       DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));
5341       DBUG_RETURN(0);
5342     }
5343 
5344     /* Create a refresh to operation to realign other clusters */
5345     // TODO AnyValue
5346     // TODO Do we ever get non-PK key?
5347     //      Keyless table?
5348     //      Unique index
5349     const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,
5350                                                                  (const char*) pk_row);
5351 
5352     if (!refresh_op)
5353     {
5354       err= conflict_trans->getNdbError();
5355       DBUG_RETURN(1);
5356     }
5357   } /* if (cfn_share->m_flags & CFF_REFRESH_ROWS) */
5358 
5359   DBUG_RETURN(0);
5360 };
5361 #endif /* HAVE_NDB_BINLOG */
5362 
5363 /**
5364   Update one record in NDB using primary key.
5365 */
5366 
start_bulk_update()5367 bool ha_ndbcluster::start_bulk_update()
5368 {
5369   DBUG_ENTER("ha_ndbcluster::start_bulk_update");
5370   if (!m_use_write && m_ignore_dup_key)
5371   {
5372     DBUG_PRINT("info", ("Batching turned off as duplicate key is "
5373                         "ignored by using peek_row"));
5374     DBUG_RETURN(TRUE);
5375   }
5376   DBUG_RETURN(FALSE);
5377 }
5378 
bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)5379 int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,
5380                                    uint *dup_key_found)
5381 {
5382   DBUG_ENTER("ha_ndbcluster::bulk_update_row");
5383   *dup_key_found= 0;
5384   DBUG_RETURN(ndb_update_row(old_data, new_data, 1));
5385 }
5386 
exec_bulk_update(uint * dup_key_found)5387 int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)
5388 {
5389   NdbTransaction* trans= m_thd_ndb->trans;
5390   DBUG_ENTER("ha_ndbcluster::exec_bulk_update");
5391   *dup_key_found= 0;
5392 
5393   // m_handler must be NULL or point to _this_ handler instance
5394   assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
5395 
5396   if (m_thd_ndb->m_handler &&
5397       m_read_before_write_removal_possible)
5398   {
5399     /*
5400       This is an autocommit involving only one table and rbwr is on
5401 
5402       Commit the autocommit transaction early(before the usual place
5403       in ndbcluster_commit) in order to:
5404       1) save one round trip, "no-commit+commit" converted to "commit"
5405       2) return the correct number of updated and affected rows
5406          to the update loop(which will ask handler in rbwr mode)
5407     */
5408     DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
5409     uint ignore_count= 0;
5410     const int ignore_error= 1;
5411     if (execute_commit(table->in_use, m_thd_ndb, trans,
5412                        m_thd_ndb->m_force_send, ignore_error,
5413                        &ignore_count) != 0)
5414     {
5415       no_uncommitted_rows_execute_failure();
5416       DBUG_RETURN(ndb_err(trans));
5417     }
5418     DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
5419     assert(m_rows_changed >= ignore_count);
5420     assert(m_rows_updated >= ignore_count);
5421     m_rows_changed-= ignore_count;
5422     m_rows_updated-= ignore_count;
5423     DBUG_RETURN(0);
5424   }
5425 
5426   if (m_thd_ndb->m_unsent_bytes == 0)
5427   {
5428     DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
5429     DBUG_RETURN(0);
5430   }
5431 
5432   if (thd_allow_batch(table->in_use))
5433   {
5434     /*
5435       Turned on by @@transaction_allow_batching=ON
5436       or implicitly by slave exec thread
5437     */
5438     DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
5439     DBUG_RETURN(0);
5440   }
5441 
5442   if (m_thd_ndb->m_handler &&
5443       !m_blobs_pending)
5444   {
5445     // Execute at commit time(in 'ndbcluster_commit') to save a round trip
5446     DBUG_PRINT("exit", ("skip execute - simple autocommit"));
5447     DBUG_RETURN(0);
5448   }
5449 
5450   uint ignore_count= 0;
5451   if (execute_no_commit(m_thd_ndb, trans,
5452                         m_ignore_no_key || m_read_before_write_removal_used,
5453                         &ignore_count) != 0)
5454   {
5455     no_uncommitted_rows_execute_failure();
5456     DBUG_RETURN(ndb_err(trans));
5457   }
5458   assert(m_rows_changed >= ignore_count);
5459   assert(m_rows_updated >= ignore_count);
5460   m_rows_changed-= ignore_count;
5461   m_rows_updated-= ignore_count;
5462   DBUG_RETURN(0);
5463 }
5464 
end_bulk_update()5465 void ha_ndbcluster::end_bulk_update()
5466 {
5467   DBUG_ENTER("ha_ndbcluster::end_bulk_update");
5468   DBUG_VOID_RETURN;
5469 }
5470 
update_row(const uchar * old_data,uchar * new_data)5471 int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)
5472 {
5473   return ndb_update_row(old_data, new_data, 0);
5474 }
5475 
5476 void
setup_key_ref_for_ndb_record(const NdbRecord ** key_rec,const uchar ** key_row,const uchar * record,bool use_active_index)5477 ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,
5478                                             const uchar **key_row,
5479                                             const uchar *record,
5480                                             bool use_active_index)
5481 {
5482   DBUG_ENTER("setup_key_ref_for_ndb_record");
5483   if (use_active_index)
5484   {
5485     /* Use unique key to access table */
5486     DBUG_PRINT("info", ("Using unique index (%u)", active_index));
5487     *key_rec= m_index[active_index].ndb_unique_record_row;
5488     *key_row= record;
5489   }
5490   else if (table_share->primary_key != MAX_KEY)
5491   {
5492     /* Use primary key to access table */
5493     DBUG_PRINT("info", ("Using primary key"));
5494     *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
5495     *key_row= record;
5496   }
5497   else
5498   {
5499     /* Use hidden primary key previously read into m_ref. */
5500     DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));
5501     /* Can't use hidden pk if we didn't read it first */
5502     DBUG_ASSERT(m_read_before_write_removal_used == false);
5503     *key_rec= m_ndb_hidden_key_record;
5504     *key_row= (const uchar *)(&m_ref);
5505   }
5506   DBUG_VOID_RETURN;
5507 }
5508 
5509 
5510 /*
5511   Update one record in NDB using primary key
5512 */
5513 
ndb_update_row(const uchar * old_data,uchar * new_data,int is_bulk_update)5514 int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,
5515                                   int is_bulk_update)
5516 {
5517   THD *thd= table->in_use;
5518   Thd_ndb *thd_ndb= m_thd_ndb;
5519   NdbTransaction *trans= thd_ndb->trans;
5520   NdbScanOperation* cursor= m_active_cursor;
5521   const NdbOperation *op;
5522   uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);
5523   int error;
5524   longlong func_value;
5525   Uint32 func_value_uint32;
5526   bool have_pk= (table_share->primary_key != MAX_KEY);
5527   bool pk_update= (!m_read_before_write_removal_possible &&
5528                    have_pk &&
5529                    bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&
5530                    primary_key_cmp(old_data, new_data));
5531   bool batch_allowed= !m_update_cannot_batch &&
5532     (is_bulk_update || thd_allow_batch(thd));
5533   NdbOperation::SetValueSpec sets[2];
5534   Uint32 num_sets= 0;
5535 
5536   DBUG_ENTER("ndb_update_row");
5537   DBUG_ASSERT(trans);
5538 
5539   error = check_slave_state(thd);
5540   if (unlikely(error))
5541     DBUG_RETURN(error);
5542 
5543   /*
5544    * If IGNORE the ignore constraint violations on primary and unique keys,
5545    * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
5546    */
5547   if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
5548                            thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
5549   {
5550     NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;
5551     int peek_res= peek_indexed_rows(new_data, write_op);
5552 
5553     if (!peek_res)
5554     {
5555       DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
5556     }
5557     if (peek_res != HA_ERR_KEY_NOT_FOUND)
5558       DBUG_RETURN(peek_res);
5559   }
5560 
5561   ha_statistic_increment(&SSV::ha_update_count);
5562 
5563   bool skip_partition_for_unique_index= FALSE;
5564   if (m_use_partition_pruning)
5565   {
5566     if (!cursor && m_read_before_write_removal_used)
5567     {
5568       ndb_index_type type= get_index_type(active_index);
5569       /*
5570         Ndb unique indexes are global so when
5571         m_read_before_write_removal_used is active
5572         the unique index can be used directly for update
5573         without finding the partitions
5574       */
5575       if (type == UNIQUE_INDEX ||
5576           type == UNIQUE_ORDERED_INDEX)
5577       {
5578         skip_partition_for_unique_index= TRUE;
5579         goto skip_partition_pruning;
5580       }
5581     }
5582     if ((error= get_parts_for_update(old_data, new_data, table->record[0],
5583                                      m_part_info, &old_part_id, &new_part_id,
5584                                      &func_value)))
5585     {
5586       m_part_info->err_value= func_value;
5587       DBUG_RETURN(error);
5588     }
5589     DBUG_PRINT("info", ("old_part_id: %u  new_part_id: %u", old_part_id, new_part_id));
5590   skip_partition_pruning:
5591     (void)0;
5592   }
5593 
5594   /*
5595    * Check for update of primary key or partition change
5596    * for special handling
5597    */
5598   if (pk_update || old_part_id != new_part_id)
5599   {
5600     DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data, old_part_id));
5601   }
5602   /*
5603     If we are updating a unique key with auto_increment
5604     then we need to update the auto_increment counter
5605    */
5606   if (table->found_next_number_field &&
5607       bitmap_is_set(table->write_set,
5608 		    table->found_next_number_field->field_index) &&
5609       (error= set_auto_inc(thd, table->found_next_number_field)))
5610   {
5611     DBUG_RETURN(error);
5612   }
5613   /*
5614     Set only non-primary-key attributes.
5615     We already checked that any primary key attribute in write_set has no
5616     real changes.
5617   */
5618   bitmap_copy(&m_bitmap, table->write_set);
5619   bitmap_subtract(&m_bitmap, m_pk_bitmap_p);
5620   uchar *mask= (uchar *)(m_bitmap.bitmap);
5621   DBUG_ASSERT(!pk_update);
5622 
5623   NdbOperation::OperationOptions *poptions = NULL;
5624   NdbOperation::OperationOptions options;
5625   options.optionsPresent=0;
5626 
5627   /* Need to set the value of any user-defined partitioning function.
5628      (excecpt for when using unique index)
5629   */
5630   if (m_user_defined_partitioning && !skip_partition_for_unique_index)
5631   {
5632     if (func_value >= INT_MAX32)
5633       func_value_uint32= INT_MAX32;
5634     else
5635       func_value_uint32= (uint32)func_value;
5636     sets[num_sets].column= get_partition_id_column();
5637     sets[num_sets].value= &func_value_uint32;
5638     num_sets++;
5639 
5640     if (!cursor)
5641     {
5642       options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
5643       options.partitionId= new_part_id;
5644     }
5645   }
5646 
5647   eventSetAnyValue(thd, &options);
5648 
5649   bool need_flush= add_row_check_if_batch_full(thd_ndb);
5650 
5651  const Uint32 authorValue = 1;
5652  if ((thd->slave_thread) &&
5653      (m_table->getExtraRowAuthorBits()))
5654  {
5655    /* Set author to indicate slave updated last */
5656    sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
5657    sets[num_sets].value= &authorValue;
5658    num_sets++;
5659  }
5660 
5661  if (num_sets)
5662  {
5663    options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;
5664    options.extraSetValues= sets;
5665    options.numExtraSetValues= num_sets;
5666  }
5667 
5668   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5669   {
5670     options.optionsPresent |=
5671       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5672   }
5673 
5674   if (cursor)
5675   {
5676     /*
5677       We are scanning records and want to update the record
5678       that was just found, call updateCurrentTuple on the cursor
5679       to take over the lock to a new update operation
5680       And thus setting the primary key of the record from
5681       the active record in cursor
5682     */
5683     DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",
5684                         table->write_set->bitmap[0]));
5685 
5686     if (options.optionsPresent != 0)
5687       poptions = &options;
5688 
5689     if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,
5690                                          (const char*)new_data, mask,
5691                                          poptions,
5692                                          sizeof(NdbOperation::OperationOptions))))
5693       ERR_RETURN(trans->getNdbError());
5694 
5695     m_lock_tuple= FALSE;
5696     thd_ndb->m_unsent_bytes+= 12;
5697   }
5698   else
5699   {
5700     const NdbRecord *key_rec;
5701     const uchar *key_row;
5702     setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,
5703 				 m_read_before_write_removal_used);
5704 
5705 #ifdef HAVE_NDB_BINLOG
5706     Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
5707     NdbInterpretedCode code(m_table, buffer,
5708                             sizeof(buffer)/sizeof(buffer[0]));
5709 
5710     if (thd->slave_thread && m_share->m_cfn_share &&
5711         m_share->m_cfn_share->m_conflict_fn)
5712     {
5713        /* Conflict resolution in slave thread. */
5714       if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,
5715                                                        key_rec,
5716                                                        old_data,
5717                                                        new_data,
5718                                                        &code,
5719                                                        &options))))
5720         DBUG_RETURN(error);
5721     }
5722 #endif /* HAVE_NDB_BINLOG */
5723     if (options.optionsPresent !=0)
5724       poptions= &options;
5725 
5726     if (!(op= trans->updateTuple(key_rec, (const char *)key_row,
5727                                  m_ndb_record, (const char*)new_data, mask,
5728                                  poptions,
5729                                  sizeof(NdbOperation::OperationOptions))))
5730       ERR_RETURN(trans->getNdbError());
5731   }
5732 
5733   uint blob_count= 0;
5734   if (uses_blob_value(table->write_set))
5735   {
5736     int row_offset= new_data - table->record[0];
5737     int res= set_blob_values(op, row_offset, table->write_set, &blob_count,
5738                              (batch_allowed && !need_flush));
5739     if (res != 0)
5740       DBUG_RETURN(res);
5741   }
5742   uint ignore_count= 0;
5743   /*
5744     Batch update operation if we are doing a scan for update, unless
5745     there exist UPDATE AFTER triggers
5746   */
5747   if (m_update_cannot_batch ||
5748       !(cursor || (batch_allowed && have_pk)) ||
5749       need_flush)
5750   {
5751     if (execute_no_commit(m_thd_ndb, trans,
5752                           m_ignore_no_key || m_read_before_write_removal_used,
5753                           &ignore_count) != 0)
5754     {
5755       no_uncommitted_rows_execute_failure();
5756       DBUG_RETURN(ndb_err(trans));
5757     }
5758   }
5759   else if (blob_count > 0)
5760     m_blobs_pending= TRUE;
5761 
5762   m_rows_changed++;
5763   m_rows_updated++;
5764 
5765   assert(m_rows_changed >= ignore_count);
5766   assert(m_rows_updated >= ignore_count);
5767   m_rows_changed-= ignore_count;
5768   m_rows_updated-= ignore_count;
5769 
5770   DBUG_RETURN(0);
5771 }
5772 
5773 
5774 /*
5775   handler delete interface
5776 */
5777 
delete_row(const uchar * record)5778 int ha_ndbcluster::delete_row(const uchar *record)
5779 {
5780   return ndb_delete_row(record, FALSE);
5781 }
5782 
start_bulk_delete()5783 bool ha_ndbcluster::start_bulk_delete()
5784 {
5785   DBUG_ENTER("start_bulk_delete");
5786   m_is_bulk_delete = true;
5787   DBUG_RETURN(0); // Bulk delete used by handler
5788 }
5789 
end_bulk_delete()5790 int ha_ndbcluster::end_bulk_delete()
5791 {
5792   NdbTransaction* trans= m_thd_ndb->trans;
5793   DBUG_ENTER("end_bulk_delete");
5794   assert(m_is_bulk_delete); // Don't allow end() without start()
5795   m_is_bulk_delete = false;
5796 
5797   // m_handler must be NULL or point to _this_ handler instance
5798   assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);
5799 
5800   if (m_thd_ndb->m_handler &&
5801       m_read_before_write_removal_possible)
5802   {
5803     /*
5804       This is an autocommit involving only one table and rbwr is on
5805 
5806       Commit the autocommit transaction early(before the usual place
5807       in ndbcluster_commit) in order to:
5808       1) save one round trip, "no-commit+commit" converted to "commit"
5809       2) return the correct number of updated and affected rows
5810          to the delete loop(which will ask handler in rbwr mode)
5811     */
5812     DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
5813     uint ignore_count= 0;
5814     const int ignore_error= 1;
5815     if (execute_commit(table->in_use, m_thd_ndb, trans,
5816                        m_thd_ndb->m_force_send, ignore_error,
5817                        &ignore_count) != 0)
5818     {
5819       no_uncommitted_rows_execute_failure();
5820       DBUG_RETURN(ndb_err(trans));
5821     }
5822     DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
5823     assert(m_rows_deleted >= ignore_count);
5824     m_rows_deleted-= ignore_count;
5825     DBUG_RETURN(0);
5826   }
5827 
5828   if (m_thd_ndb->m_unsent_bytes == 0)
5829   {
5830     DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
5831     DBUG_RETURN(0);
5832   }
5833 
5834   if (thd_allow_batch(table->in_use))
5835   {
5836     /*
5837       Turned on by @@transaction_allow_batching=ON
5838       or implicitly by slave exec thread
5839     */
5840     DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
5841     DBUG_RETURN(0);
5842   }
5843 
5844   if (m_thd_ndb->m_handler)
5845   {
5846     // Execute at commit time(in 'ndbcluster_commit') to save a round trip
5847     DBUG_PRINT("exit", ("skip execute - simple autocommit"));
5848     DBUG_RETURN(0);
5849   }
5850 
5851   uint ignore_count= 0;
5852   if (execute_no_commit(m_thd_ndb, trans,
5853                         m_ignore_no_key || m_read_before_write_removal_used,
5854                         &ignore_count) != 0)
5855   {
5856     no_uncommitted_rows_execute_failure();
5857     DBUG_RETURN(ndb_err(trans));
5858   }
5859 
5860   assert(m_rows_deleted >= ignore_count);
5861   m_rows_deleted-= ignore_count;
5862   no_uncommitted_rows_update(ignore_count);
5863   DBUG_RETURN(0);
5864 }
5865 
5866 
5867 /**
5868   Delete one record from NDB, using primary key .
5869 */
5870 
ndb_delete_row(const uchar * record,bool primary_key_update)5871 int ha_ndbcluster::ndb_delete_row(const uchar *record,
5872                                   bool primary_key_update)
5873 {
5874   THD *thd= table->in_use;
5875   Thd_ndb *thd_ndb= get_thd_ndb(thd);
5876   NdbTransaction *trans= m_thd_ndb->trans;
5877   NdbScanOperation* cursor= m_active_cursor;
5878   const NdbOperation *op;
5879   uint32 part_id= ~uint32(0);
5880   int error;
5881   bool allow_batch= !m_delete_cannot_batch &&
5882     (m_is_bulk_delete || thd_allow_batch(thd));
5883 
5884   DBUG_ENTER("ndb_delete_row");
5885   DBUG_ASSERT(trans);
5886 
5887   error = check_slave_state(thd);
5888   if (unlikely(error))
5889     DBUG_RETURN(error);
5890 
5891   ha_statistic_increment(&SSV::ha_delete_count);
5892   m_rows_changed++;
5893 
5894   bool skip_partition_for_unique_index= FALSE;
5895   if (m_use_partition_pruning)
5896   {
5897     if (!cursor && m_read_before_write_removal_used)
5898     {
5899       ndb_index_type type= get_index_type(active_index);
5900       /*
5901         Ndb unique indexes are global so when
5902         m_read_before_write_removal_used is active
5903         the unique index can be used directly for deleting
5904         without finding the partitions
5905       */
5906       if (type == UNIQUE_INDEX ||
5907           type == UNIQUE_ORDERED_INDEX)
5908       {
5909         skip_partition_for_unique_index= TRUE;
5910         goto skip_partition_pruning;
5911       }
5912     }
5913     if ((error= get_part_for_delete(record, table->record[0], m_part_info,
5914                                     &part_id)))
5915     {
5916       DBUG_RETURN(error);
5917     }
5918   skip_partition_pruning:
5919     (void)0;
5920   }
5921 
5922   NdbOperation::OperationOptions options;
5923   NdbOperation::OperationOptions *poptions = NULL;
5924   options.optionsPresent=0;
5925 
5926   eventSetAnyValue(thd, &options);
5927 
5928   /*
5929     Poor approx. let delete ~ tabsize / 4
5930   */
5931   uint delete_size= 12 + (m_bytes_per_write >> 2);
5932   bool need_flush= add_row_check_if_batch_full_size(thd_ndb, delete_size);
5933 
5934   if (thd->slave_thread || THDVAR(thd, deferred_constraints))
5935   {
5936     options.optionsPresent |=
5937       NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
5938   }
5939 
5940   if (cursor)
5941   {
5942     if (options.optionsPresent != 0)
5943       poptions = &options;
5944 
5945     /*
5946       We are scanning records and want to delete the record
5947       that was just found, call deleteTuple on the cursor
5948       to take over the lock to a new delete operation
5949       And thus setting the primary key of the record from
5950       the active record in cursor
5951     */
5952     DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
5953     if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,
5954                                          NULL, // result_row
5955                                          NULL, // result_mask
5956                                          poptions,
5957                                          sizeof(NdbOperation::OperationOptions))) == 0)
5958       ERR_RETURN(trans->getNdbError());
5959     m_lock_tuple= FALSE;
5960     thd_ndb->m_unsent_bytes+= 12;
5961 
5962     no_uncommitted_rows_update(-1);
5963     m_rows_deleted++;
5964 
5965     if (!(primary_key_update || m_delete_cannot_batch))
5966     {
5967       // If deleting from cursor, NoCommit will be handled in next_result
5968       DBUG_RETURN(0);
5969     }
5970   }
5971   else
5972   {
5973     const NdbRecord *key_rec;
5974     const uchar *key_row;
5975 
5976     if (m_user_defined_partitioning && !skip_partition_for_unique_index)
5977     {
5978       options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
5979       options.partitionId= part_id;
5980     }
5981 
5982     setup_key_ref_for_ndb_record(&key_rec, &key_row, record,
5983 				 m_read_before_write_removal_used);
5984 
5985 #ifdef HAVE_NDB_BINLOG
5986     Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
5987     NdbInterpretedCode code(m_table, buffer,
5988                             sizeof(buffer)/sizeof(buffer[0]));
5989     if (thd->slave_thread && m_share->m_cfn_share &&
5990         m_share->m_cfn_share->m_conflict_fn)
5991     {
5992       /* Conflict resolution in slave thread. */
5993       if (unlikely((error = prepare_conflict_detection(DELETE_ROW,
5994                                                        key_rec,
5995                                                        key_row, /* old_data */
5996                                                        NULL,    /* new_data */
5997                                                        &code,
5998                                                        &options))))
5999         DBUG_RETURN(error);
6000     }
6001 #endif /* HAVE_NDB_BINLOG */
6002     if (options.optionsPresent != 0)
6003       poptions= &options;
6004 
6005     if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,
6006                                 m_ndb_record,
6007                                 NULL, // row
6008                                 NULL, // mask
6009                                 poptions,
6010                                 sizeof(NdbOperation::OperationOptions))))
6011       ERR_RETURN(trans->getNdbError());
6012 
6013     no_uncommitted_rows_update(-1);
6014     m_rows_deleted++;
6015 
6016     /*
6017       Check if we can batch the delete.
6018 
6019       We don't batch deletes as part of primary key updates.
6020       We do not batch deletes on tables with no primary key. For such tables,
6021       replication uses full table scan to locate the row to delete. The
6022       problem is the following scenario when deleting 2 (or more) rows:
6023 
6024        1. Table scan to locate the first row.
6025        2. Delete the row, batched so no execute.
6026        3. Table scan to locate the second row is executed, along with the
6027           batched delete operation from step 2.
6028        4. The first row is returned from nextResult() (not deleted yet).
6029        5. The kernel deletes the row (operation from step 2).
6030        6. lockCurrentTuple() is called on the row returned in step 4. However,
6031           as that row is now deleted, the operation fails and the transaction
6032           is aborted.
6033        7. The delete of the second tuple now fails, as the transaction has
6034           been aborted.
6035     */
6036 
6037     if ( allow_batch &&
6038 	 table_share->primary_key != MAX_KEY &&
6039 	 !primary_key_update &&
6040 	 !need_flush)
6041     {
6042       DBUG_RETURN(0);
6043     }
6044   }
6045 
6046   // Execute delete operation
6047   uint ignore_count= 0;
6048   if (execute_no_commit(m_thd_ndb, trans,
6049                         m_ignore_no_key || m_read_before_write_removal_used,
6050                         &ignore_count) != 0)
6051   {
6052     no_uncommitted_rows_execute_failure();
6053     DBUG_RETURN(ndb_err(trans));
6054   }
6055   if (!primary_key_update)
6056   {
6057     assert(m_rows_deleted >= ignore_count);
6058     m_rows_deleted-= ignore_count;
6059     no_uncommitted_rows_update(ignore_count);
6060   }
6061   DBUG_RETURN(0);
6062 }
6063 
6064 /**
6065   Unpack a record returned from a scan.
6066   We copy field-for-field to
6067    1. Avoid unnecessary copying for sparse rows.
6068    2. Properly initialize not used null bits.
6069   Note that we do not unpack all returned rows; some primary/unique key
6070   operations can read directly into the destination row.
6071 */
unpack_record(uchar * dst_row,const uchar * src_row)6072 void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)
6073 {
6074   int res;
6075   DBUG_ASSERT(src_row != NULL);
6076 
6077   my_ptrdiff_t dst_offset= dst_row - table->record[0];
6078   my_ptrdiff_t src_offset= src_row - table->record[0];
6079 
6080   /* Initialize the NULL bitmap. */
6081   memset(dst_row, 0xff, table->s->null_bytes);
6082 
6083   uchar *blob_ptr= m_blobs_buffer;
6084 
6085   for (uint i= 0; i < table_share->fields; i++)
6086   {
6087     Field *field= table->field[i];
6088     if (bitmap_is_set(table->read_set, i))
6089     {
6090       if (field->type() == MYSQL_TYPE_BIT)
6091       {
6092         Field_bit *field_bit= static_cast<Field_bit*>(field);
6093         if (!field->is_real_null(src_offset))
6094         {
6095           field->move_field_offset(src_offset);
6096           longlong value= field_bit->val_int();
6097           field->move_field_offset(dst_offset-src_offset);
6098           field_bit->set_notnull();
6099           /* Field_bit in DBUG requires the bit set in write_set for store(). */
6100           my_bitmap_map *old_map=
6101             dbug_tmp_use_all_columns(table, table->write_set);
6102           int res = field_bit->store(value, true);
6103           assert(res == 0); NDB_IGNORE_VALUE(res);
6104           dbug_tmp_restore_column_map(table->write_set, old_map);
6105           field->move_field_offset(-dst_offset);
6106         }
6107       }
6108       else if (field->flags & BLOB_FLAG)
6109       {
6110         Field_blob *field_blob= (Field_blob *)field;
6111         NdbBlob *ndb_blob= m_value[i].blob;
6112         /* unpack_record *only* called for scan result processing
6113          * *while* the scan is open and the Blob is active.
6114          * Verify Blob state to be certain.
6115          * Accessing PK/UK op Blobs after execute() is unsafe
6116          */
6117         DBUG_ASSERT(ndb_blob != 0);
6118         DBUG_ASSERT(ndb_blob->getState() == NdbBlob::Active);
6119         int isNull;
6120         res= ndb_blob->getNull(isNull);
6121         DBUG_ASSERT(res == 0);                  // Already succeeded once
6122         Uint64 len64= 0;
6123         field_blob->move_field_offset(dst_offset);
6124         if (!isNull)
6125         {
6126           res= ndb_blob->getLength(len64);
6127           DBUG_ASSERT(res == 0 && len64 <= (Uint64)0xffffffff);
6128           field->set_notnull();
6129         }
6130         /* Need not set_null(), as we initialized null bits to 1 above. */
6131         field_blob->set_ptr((uint32)len64, blob_ptr);
6132         field_blob->move_field_offset(-dst_offset);
6133         blob_ptr+= (len64 + 7) & ~((Uint64)7);
6134       }
6135       else
6136       {
6137         field->move_field_offset(src_offset);
6138         /* Normal field (not blob or bit type). */
6139         if (!field->is_null())
6140         {
6141           /* Only copy actually used bytes of varstrings. */
6142           uint32 actual_length= field_used_length(field);
6143           uchar *src_ptr= field->ptr;
6144           field->move_field_offset(dst_offset - src_offset);
6145           field->set_notnull();
6146           memcpy(field->ptr, src_ptr, actual_length);
6147 #ifdef HAVE_purify
6148           /*
6149             We get Valgrind warnings on uninitialised padding bytes in
6150             varstrings, for example when writing rows to temporary tables.
6151             So for valgrind builds we pad with zeros, not needed for
6152             production code.
6153           */
6154           if (actual_length < field->pack_length())
6155             memset(field->ptr + actual_length, 0,
6156                   field->pack_length() - actual_length);
6157 #endif
6158           field->move_field_offset(-dst_offset);
6159         }
6160         else
6161           field->move_field_offset(-src_offset);
6162         /* No action needed for a NULL field. */
6163       }
6164     }
6165   }
6166 }
6167 
6168 
6169 /**
6170   Get the default value of the field from default_values of the table.
6171 */
get_default_value(void * def_val,Field * field)6172 static void get_default_value(void *def_val, Field *field)
6173 {
6174   DBUG_ASSERT(field != NULL);
6175 
6176   my_ptrdiff_t src_offset= field->table->s->default_values - field->table->record[0];
6177 
6178   {
6179     if (bitmap_is_set(field->table->read_set, field->field_index))
6180     {
6181       if (field->type() == MYSQL_TYPE_BIT)
6182       {
6183         Field_bit *field_bit= static_cast<Field_bit*>(field);
6184         if (!field->is_real_null(src_offset))
6185         {
6186           field->move_field_offset(src_offset);
6187           longlong value= field_bit->val_int();
6188           /* Map to NdbApi format - two Uint32s */
6189           Uint32 out[2];
6190           out[0] = 0;
6191           out[1] = 0;
6192           for (int b=0; b < 64; b++)
6193           {
6194             out[b >> 5] |= (value & 1) << (b & 31);
6195 
6196             value= value >> 1;
6197           }
6198           memcpy(def_val, out, sizeof(longlong));
6199           field->move_field_offset(-src_offset);
6200         }
6201       }
6202       else if (field->flags & BLOB_FLAG)
6203       {
6204         assert(false);
6205       }
6206       else
6207       {
6208         field->move_field_offset(src_offset);
6209         /* Normal field (not blob or bit type). */
6210         if (!field->is_null())
6211         {
6212           /* Only copy actually used bytes of varstrings. */
6213           uint32 actual_length= field_used_length(field);
6214           uchar *src_ptr= field->ptr;
6215           field->set_notnull();
6216           memcpy(def_val, src_ptr, actual_length);
6217 #ifdef HAVE_purify
6218           if (actual_length < field->pack_length())
6219             memset(((char*)def_val) + actual_length, 0,
6220                   field->pack_length() - actual_length);
6221 #endif
6222         }
6223         field->move_field_offset(-src_offset);
6224         /* No action needed for a NULL field. */
6225       }
6226     }
6227   }
6228 }
6229 
6230 /*
6231     DBUG_EXECUTE("value", print_results(););
6232 */
6233 
print_results()6234 void ha_ndbcluster::print_results()
6235 {
6236   DBUG_ENTER("print_results");
6237 
6238 #ifndef DBUG_OFF
6239 
6240   char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
6241   String type(buf_type, sizeof(buf_type), &my_charset_bin);
6242   String val(buf_val, sizeof(buf_val), &my_charset_bin);
6243   for (uint f= 0; f < table_share->fields; f++)
6244   {
6245     /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
6246     char buf[2000];
6247     Field *field;
6248     void* ptr;
6249     NdbValue value;
6250 
6251     buf[0]= 0;
6252     field= table->field[f];
6253     if (!(value= m_value[f]).ptr)
6254     {
6255       strmov(buf, "not read");
6256       goto print_value;
6257     }
6258 
6259     ptr= field->ptr;
6260 
6261     if (! (field->flags & BLOB_FLAG))
6262     {
6263       if (value.rec->isNULL())
6264       {
6265         strmov(buf, "NULL");
6266         goto print_value;
6267       }
6268       type.length(0);
6269       val.length(0);
6270       field->sql_type(type);
6271       field->val_str(&val);
6272       my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
6273     }
6274     else
6275     {
6276       NdbBlob *ndb_blob= value.blob;
6277       bool isNull= TRUE;
6278       assert(ndb_blob->getState() == NdbBlob::Active);
6279       ndb_blob->getNull(isNull);
6280       if (isNull)
6281         strmov(buf, "NULL");
6282     }
6283 
6284 print_value:
6285     DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
6286   }
6287 #endif
6288   DBUG_VOID_RETURN;
6289 }
6290 
6291 
6292 /*
6293   Set fields in partition functions in read set for underlying handlers
6294 
6295   SYNOPSIS
6296     include_partition_fields_in_used_fields()
6297 
6298   RETURN VALUE
6299     NONE
6300 
6301   DESCRIPTION
6302     Some handlers only read fields as specified by the bitmap for the
6303     read set. For partitioned handlers we always require that the
6304     fields of the partition functions are read such that we can
6305     calculate the partition id to place updated and deleted records.
6306 */
6307 
6308 static void
include_partition_fields_in_used_fields(Field ** ptr,MY_BITMAP * read_set)6309 include_partition_fields_in_used_fields(Field **ptr, MY_BITMAP *read_set)
6310 {
6311   DBUG_ENTER("include_partition_fields_in_used_fields");
6312   do
6313   {
6314     bitmap_set_bit(read_set, (*ptr)->field_index);
6315   } while (*(++ptr));
6316   DBUG_VOID_RETURN;
6317 }
6318 
6319 
index_init(uint index,bool sorted)6320 int ha_ndbcluster::index_init(uint index, bool sorted)
6321 {
6322   DBUG_ENTER("ha_ndbcluster::index_init");
6323   DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));
6324   active_index= index;
6325   m_sorted= sorted;
6326   /*
6327     Locks are are explicitly released in scan
6328     unless m_lock.type == TL_READ_HIGH_PRIORITY
6329     and no sub-sequent call to unlock_row()
6330   */
6331   m_lock_tuple= FALSE;
6332   if (table_share->primary_key == MAX_KEY &&
6333       m_use_partition_pruning)
6334     include_partition_fields_in_used_fields(
6335       m_part_info->full_part_field_array,
6336       table->read_set);
6337   DBUG_RETURN(0);
6338 }
6339 
6340 
index_end()6341 int ha_ndbcluster::index_end()
6342 {
6343   DBUG_ENTER("ha_ndbcluster::index_end");
6344   DBUG_RETURN(close_scan());
6345 }
6346 
6347 /**
6348   Check if key contains null.
6349 */
6350 static
6351 int
check_null_in_key(const KEY * key_info,const uchar * key,uint key_len)6352 check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)
6353 {
6354   KEY_PART_INFO *curr_part, *end_part;
6355   const uchar* end_ptr= key + key_len;
6356   curr_part= key_info->key_part;
6357   end_part= curr_part + key_info->user_defined_key_parts;
6358 
6359   for (; curr_part != end_part && key < end_ptr; curr_part++)
6360   {
6361     if (curr_part->null_bit && *key)
6362       return 1;
6363 
6364     key += curr_part->store_length;
6365   }
6366   return 0;
6367 }
6368 
6369 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)6370 int ha_ndbcluster::index_read_idx_map(uchar* buf, uint index,
6371                                       const uchar* key,
6372                                       key_part_map keypart_map,
6373                                       enum ha_rkey_function find_flag)
6374 {
6375   DBUG_ENTER("ha_ndbcluster::index_read_idx_map");
6376   int error= index_init(index, 0);
6377   if (unlikely(error))
6378     DBUG_RETURN(error);
6379 
6380   DBUG_RETURN(index_read_map(buf, key, keypart_map, find_flag));
6381 }
6382 
6383 
index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)6384 int ha_ndbcluster::index_read(uchar *buf,
6385                               const uchar *key, uint key_len,
6386                               enum ha_rkey_function find_flag)
6387 {
6388   key_range start_key;
6389   bool descending= FALSE;
6390   DBUG_ENTER("ha_ndbcluster::index_read");
6391   DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d",
6392                        active_index, key_len, find_flag));
6393 
6394   start_key.key= key;
6395   start_key.length= key_len;
6396   start_key.flag= find_flag;
6397   descending= FALSE;
6398   switch (find_flag) {
6399   case HA_READ_KEY_OR_PREV:
6400   case HA_READ_BEFORE_KEY:
6401   case HA_READ_PREFIX_LAST:
6402   case HA_READ_PREFIX_LAST_OR_PREV:
6403     descending= TRUE;
6404     break;
6405   default:
6406     break;
6407   }
6408   const int error= read_range_first_to_buf(&start_key, 0, descending,
6409                                            m_sorted, buf);
6410   table->status=error ? STATUS_NOT_FOUND: 0;
6411   DBUG_RETURN(error);
6412 }
6413 
6414 
index_next(uchar * buf)6415 int ha_ndbcluster::index_next(uchar *buf)
6416 {
6417   DBUG_ENTER("ha_ndbcluster::index_next");
6418   ha_statistic_increment(&SSV::ha_read_next_count);
6419   const int error= next_result(buf);
6420   table->status=error ? STATUS_NOT_FOUND: 0;
6421   DBUG_RETURN(error);
6422 }
6423 
6424 
index_prev(uchar * buf)6425 int ha_ndbcluster::index_prev(uchar *buf)
6426 {
6427   DBUG_ENTER("ha_ndbcluster::index_prev");
6428   ha_statistic_increment(&SSV::ha_read_prev_count);
6429   const int error= next_result(buf);
6430   table->status=error ? STATUS_NOT_FOUND: 0;
6431   DBUG_RETURN(error);
6432 }
6433 
6434 
index_first(uchar * buf)6435 int ha_ndbcluster::index_first(uchar *buf)
6436 {
6437   DBUG_ENTER("ha_ndbcluster::index_first");
6438   ha_statistic_increment(&SSV::ha_read_first_count);
6439   // Start the ordered index scan and fetch the first row
6440 
6441   // Only HA_READ_ORDER indexes get called by index_first
6442   const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);
6443   table->status=error ? STATUS_NOT_FOUND: 0;
6444   DBUG_RETURN(error);
6445 }
6446 
6447 
index_last(uchar * buf)6448 int ha_ndbcluster::index_last(uchar *buf)
6449 {
6450   DBUG_ENTER("ha_ndbcluster::index_last");
6451   ha_statistic_increment(&SSV::ha_read_last_count);
6452   const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);
6453   table->status=error ? STATUS_NOT_FOUND: 0;
6454   DBUG_RETURN(error);
6455 }
6456 
index_read_last(uchar * buf,const uchar * key,uint key_len)6457 int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
6458 {
6459   DBUG_ENTER("ha_ndbcluster::index_read_last");
6460   DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
6461 }
6462 
6463 
6464 /**
6465   Read first row (only) from a table.
6466 
6467   This is actually (yet) never called for ndbcluster tables, as these table types
6468   does not set HA_STATS_RECORDS_IS_EXACT.
6469 
6470   UPDATE: Might be called if the predicate contain '<column> IS NULL', and
6471           <column> is defined as 'NOT NULL' (or is part of primary key)
6472 
6473   Implemented regardless of this as the default implememtation would break
6474   any pushed joins as it calls ha_rnd_end() / ha_index_end() at end of execution.
6475   */
read_first_row(uchar * buf,uint primary_key)6476 int ha_ndbcluster::read_first_row(uchar * buf, uint primary_key)
6477 {
6478   register int error;
6479   DBUG_ENTER("ha_ndbcluster::read_first_row");
6480 
6481   ha_statistic_increment(&SSV::ha_read_first_count);
6482 
6483   /*
6484     If there is very few deleted rows in the table, find the first row by
6485     scanning the table.
6486     TODO remove the test for HA_READ_ORDER
6487   */
6488   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
6489       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
6490   {
6491     (void) ha_rnd_init(1);
6492     while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
6493   }
6494   else
6495   {
6496     /* Find the first row through the primary key */
6497     (void) ha_index_init(primary_key, 0);
6498     error=index_first(buf);
6499   }
6500   DBUG_RETURN(error);
6501 }
6502 
6503 
read_range_first_to_buf(const key_range * start_key,const key_range * end_key,bool desc,bool sorted,uchar * buf)6504 int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
6505                                            const key_range *end_key,
6506                                            bool desc, bool sorted,
6507                                            uchar* buf)
6508 {
6509   part_id_range part_spec;
6510   ndb_index_type type= get_index_type(active_index);
6511   const KEY* key_info= table->key_info+active_index;
6512   int error;
6513   DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
6514   DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));
6515 
6516   if (unlikely((error= close_scan())))
6517     DBUG_RETURN(error);
6518 
6519   if (m_use_partition_pruning)
6520   {
6521     DBUG_ASSERT(m_pushed_join_operation != PUSHED_ROOT);
6522     get_partition_set(table, buf, active_index, start_key, &part_spec);
6523     DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
6524                         part_spec.start_part, part_spec.end_part));
6525     /*
6526       If partition pruning has found no partition in set
6527       we can return HA_ERR_END_OF_FILE
6528       If partition pruning has found exactly one partition in set
6529       we can optimize scan to run towards that partition only.
6530     */
6531     if (part_spec.start_part > part_spec.end_part)
6532     {
6533       DBUG_RETURN(HA_ERR_END_OF_FILE);
6534     }
6535 
6536     if (part_spec.start_part == part_spec.end_part)
6537     {
6538       /*
6539         Only one partition is required to scan, if sorted is required we
6540         don't need it any more since output from one ordered partitioned
6541         index is always sorted.
6542       */
6543       sorted= FALSE;
6544       if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))
6545       {
6546         DBUG_RETURN(error);
6547       }
6548     }
6549   }
6550 
6551   switch (type){
6552   case PRIMARY_KEY_ORDERED_INDEX:
6553   case PRIMARY_KEY_INDEX:
6554     if (start_key &&
6555         start_key->length == key_info->key_length &&
6556         start_key->flag == HA_READ_KEY_EXACT)
6557     {
6558       if (!m_thd_ndb->trans)
6559         if (unlikely(!start_transaction_key(active_index,
6560                                             start_key->key, error)))
6561           DBUG_RETURN(error);
6562       error= pk_read(start_key->key, start_key->length, buf,
6563 		  (m_use_partition_pruning)? &(part_spec.start_part) : NULL);
6564       DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
6565     }
6566     break;
6567   case UNIQUE_ORDERED_INDEX:
6568   case UNIQUE_INDEX:
6569     if (start_key && start_key->length == key_info->key_length &&
6570         start_key->flag == HA_READ_KEY_EXACT &&
6571         !check_null_in_key(key_info, start_key->key, start_key->length))
6572     {
6573       if (!m_thd_ndb->trans)
6574         if (unlikely(!start_transaction_key(active_index,
6575                                             start_key->key, error)))
6576           DBUG_RETURN(error);
6577       error= unique_index_read(start_key->key, start_key->length, buf);
6578       DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
6579     }
6580     else if (type == UNIQUE_INDEX)
6581       DBUG_RETURN(full_table_scan(key_info,
6582                                   start_key,
6583                                   end_key,
6584                                   buf));
6585     break;
6586   default:
6587     break;
6588   }
6589   if (!m_use_partition_pruning && !m_thd_ndb->trans)
6590   {
6591     get_partition_set(table, buf, active_index, start_key, &part_spec);
6592     if (part_spec.start_part == part_spec.end_part)
6593       if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))
6594         DBUG_RETURN(error);
6595   }
6596   // Start the ordered index scan and fetch the first row
6597   DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
6598 	  (m_use_partition_pruning)? &part_spec : NULL));
6599 }
6600 
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_r,bool sorted)6601 int ha_ndbcluster::read_range_first(const key_range *start_key,
6602                                     const key_range *end_key,
6603                                     bool eq_r, bool sorted)
6604 {
6605   uchar* buf= table->record[0];
6606   DBUG_ENTER("ha_ndbcluster::read_range_first");
6607   DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
6608                                       sorted, buf));
6609 }
6610 
read_range_next()6611 int ha_ndbcluster::read_range_next()
6612 {
6613   DBUG_ENTER("ha_ndbcluster::read_range_next");
6614   DBUG_RETURN(next_result(table->record[0]));
6615 }
6616 
6617 
rnd_init(bool scan)6618 int ha_ndbcluster::rnd_init(bool scan)
6619 {
6620   int error;
6621   DBUG_ENTER("rnd_init");
6622   DBUG_PRINT("enter", ("scan: %d", scan));
6623 
6624   if ((error= close_scan()))
6625     DBUG_RETURN(error);
6626   index_init(table_share->primary_key, 0);
6627   DBUG_RETURN(0);
6628 }
6629 
close_scan()6630 int ha_ndbcluster::close_scan()
6631 {
6632   /*
6633     workaround for bug #39872 - explain causes segv
6634     - rnd_end/close_scan is called on unlocked table
6635     - should be fixed in server code, but this will
6636     not be done until 6.0 as it is too intrusive
6637   */
6638   if (m_thd_ndb == NULL)
6639     return 0;
6640   NdbTransaction *trans= m_thd_ndb->trans;
6641   int error;
6642   DBUG_ENTER("close_scan");
6643 
6644   if (m_active_query)
6645   {
6646     m_active_query->close(m_thd_ndb->m_force_send);
6647     m_active_query= NULL;
6648   }
6649 
6650   NdbScanOperation *cursor= m_active_cursor;
6651 
6652   if (!cursor)
6653   {
6654     cursor = m_multi_cursor;
6655     if (!cursor)
6656       DBUG_RETURN(0);
6657   }
6658 
6659   if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
6660     DBUG_RETURN(error);
6661 
6662   if (m_thd_ndb->m_unsent_bytes)
6663   {
6664     /*
6665       Take over any pending transactions to the
6666       deleteing/updating transaction before closing the scan
6667     */
6668     DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
6669                         (long) m_thd_ndb->m_unsent_bytes));
6670     if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
6671     {
6672       no_uncommitted_rows_execute_failure();
6673       DBUG_RETURN(ndb_err(trans));
6674     }
6675   }
6676 
6677   cursor->close(m_thd_ndb->m_force_send, TRUE);
6678   m_active_cursor= NULL;
6679   m_multi_cursor= NULL;
6680   DBUG_RETURN(0);
6681 }
6682 
rnd_end()6683 int ha_ndbcluster::rnd_end()
6684 {
6685   DBUG_ENTER("rnd_end");
6686   DBUG_RETURN(close_scan());
6687 }
6688 
6689 
rnd_next(uchar * buf)6690 int ha_ndbcluster::rnd_next(uchar *buf)
6691 {
6692   DBUG_ENTER("rnd_next");
6693   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
6694 
6695   int error;
6696   if (m_active_cursor)
6697     error= next_result(buf);
6698   else if (m_active_query)
6699     error= next_result(buf);
6700   else
6701     error= full_table_scan(NULL, NULL, NULL, buf);
6702 
6703   table->status= error ? STATUS_NOT_FOUND: 0;
6704   DBUG_RETURN(error);
6705 }
6706 
6707 
6708 /**
6709   An "interesting" record has been found and it's pk
6710   retrieved by calling position. Now it's time to read
6711   the record from db once again.
6712 */
6713 
rnd_pos(uchar * buf,uchar * pos)6714 int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
6715 {
6716   DBUG_ENTER("rnd_pos");
6717   ha_statistic_increment(&SSV::ha_read_rnd_count);
6718   // The primary key for the record is stored in pos
6719   // Perform a pk_read using primary key "index"
6720   {
6721     part_id_range part_spec;
6722     uint key_length= ref_length;
6723     if (m_user_defined_partitioning)
6724     {
6725       if (table_share->primary_key == MAX_KEY)
6726       {
6727         /*
6728           The partition id has been fetched from ndb
6729           and has been stored directly after the hidden key
6730         */
6731         DBUG_DUMP("key+part", pos, key_length);
6732         key_length= ref_length - sizeof(m_part_id);
6733         part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
6734       }
6735       else
6736       {
6737         key_range key_spec;
6738         KEY *key_info= table->key_info + table_share->primary_key;
6739         key_spec.key= pos;
6740         key_spec.length= key_length;
6741         key_spec.flag= HA_READ_KEY_EXACT;
6742         get_full_part_id_from_key(table, buf, key_info,
6743                                   &key_spec, &part_spec);
6744         DBUG_ASSERT(part_spec.start_part == part_spec.end_part);
6745       }
6746       DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
6747     }
6748     DBUG_DUMP("key", pos, key_length);
6749     int res= pk_read(pos, key_length, buf,
6750                      (m_user_defined_partitioning) ?
6751                      &(part_spec.start_part)
6752                      : NULL);
6753     if (res == HA_ERR_KEY_NOT_FOUND)
6754     {
6755       /**
6756        * When using rnd_pos
6757        *   server first retrives a set of records (typically scans them)
6758        *   and store a unique identifier (for ndb this is the primary key)
6759        *   and later retreives the record again using rnd_pos and the
6760        *   saved primary key. For ndb, since we only support committed read
6761        *   the record could have been deleted in between the "save" and
6762        *   the rnd_pos.
6763        *   Therefor we return HA_ERR_RECORD_DELETED in this case rather than
6764        *   HA_ERR_KEY_NOT_FOUND (which will cause statment to be aborted)
6765        *
6766        */
6767       res= HA_ERR_RECORD_DELETED;
6768     }
6769     table->status= res ? STATUS_NOT_FOUND: 0;
6770     DBUG_RETURN(res);
6771   }
6772 }
6773 
6774 
6775 /**
6776   Store the primary key of this record in ref
6777   variable, so that the row can be retrieved again later
6778   using "reference" in rnd_pos.
6779 */
6780 
position(const uchar * record)6781 void ha_ndbcluster::position(const uchar *record)
6782 {
6783   KEY *key_info;
6784   KEY_PART_INFO *key_part;
6785   KEY_PART_INFO *end;
6786   uchar *buff;
6787   uint key_length;
6788 
6789   DBUG_ENTER("position");
6790 
6791   if (table_share->primary_key != MAX_KEY)
6792   {
6793     key_length= ref_length;
6794     key_info= table->key_info + table_share->primary_key;
6795     key_part= key_info->key_part;
6796     end= key_part + key_info->user_defined_key_parts;
6797     buff= ref;
6798 
6799     for (; key_part != end; key_part++)
6800     {
6801       if (key_part->null_bit) {
6802         /* Store 0 if the key part is a NULL part */
6803         if (record[key_part->null_offset]
6804             & key_part->null_bit) {
6805           *buff++= 1;
6806           continue;
6807         }
6808         *buff++= 0;
6809       }
6810 
6811       size_t len = key_part->length;
6812       const uchar * ptr = record + key_part->offset;
6813       Field *field = key_part->field;
6814       if (field->type() ==  MYSQL_TYPE_VARCHAR)
6815       {
6816         if (((Field_varstring*)field)->length_bytes == 1)
6817         {
6818           /**
6819            * Keys always use 2 bytes length
6820            */
6821           buff[0] = ptr[0];
6822           buff[1] = 0;
6823           memcpy(buff+2, ptr + 1, len);
6824         }
6825         else
6826         {
6827           memcpy(buff, ptr, len + 2);
6828         }
6829         len += 2;
6830       }
6831       else
6832       {
6833         memcpy(buff, ptr, len);
6834       }
6835       buff += len;
6836     }
6837   }
6838   else
6839   {
6840     // No primary key, get hidden key
6841     DBUG_PRINT("info", ("Getting hidden key"));
6842     // If table has user defined partition save the partition id as well
6843     if (m_user_defined_partitioning)
6844     {
6845       DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
6846       key_length= ref_length - sizeof(m_part_id);
6847       memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
6848     }
6849     else
6850       key_length= ref_length;
6851 #ifndef DBUG_OFF
6852     int hidden_no= table->s->fields;
6853     const NDBTAB *tab= m_table;
6854     const NDBCOL *hidden_col= tab->getColumn(hidden_no);
6855     DBUG_ASSERT(hidden_col->getPrimaryKey() &&
6856                 hidden_col->getAutoIncrement() &&
6857                 key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
6858 #endif
6859     memcpy(ref, &m_ref, key_length);
6860   }
6861 #ifndef DBUG_OFF
6862   if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning)
6863     DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));
6864 #endif
6865   DBUG_DUMP("ref", ref, key_length);
6866   DBUG_VOID_RETURN;
6867 }
6868 
6869 int
cmp_ref(const uchar * ref1,const uchar * ref2)6870 ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)
6871 {
6872   DBUG_ENTER("cmp_ref");
6873 
6874   if (table_share->primary_key != MAX_KEY)
6875   {
6876     KEY *key_info= table->key_info + table_share->primary_key;
6877     KEY_PART_INFO *key_part= key_info->key_part;
6878     KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
6879 
6880     for (; key_part != end; key_part++)
6881     {
6882       // NOTE: No need to check for null since PK is not-null
6883 
6884       Field *field= key_part->field;
6885       int result= field->key_cmp(ref1, ref2);
6886       if (result)
6887       {
6888         DBUG_RETURN(result);
6889       }
6890 
6891       if (field->type() ==  MYSQL_TYPE_VARCHAR)
6892       {
6893         ref1+= 2;
6894         ref2+= 2;
6895       }
6896 
6897       ref1+= key_part->length;
6898       ref2+= key_part->length;
6899     }
6900     DBUG_RETURN(0);
6901   }
6902   else
6903   {
6904     DBUG_RETURN(memcmp(ref1, ref2, ref_length));
6905   }
6906 }
6907 
info(uint flag)6908 int ha_ndbcluster::info(uint flag)
6909 {
6910   THD *thd= table->in_use;
6911   int result= 0;
6912   DBUG_ENTER("info");
6913   DBUG_PRINT("enter", ("flag: %d", flag));
6914 
6915   if (flag & HA_STATUS_POS)
6916     DBUG_PRINT("info", ("HA_STATUS_POS"));
6917   if (flag & HA_STATUS_TIME)
6918     DBUG_PRINT("info", ("HA_STATUS_TIME"));
6919   while (flag & HA_STATUS_VARIABLE)
6920   {
6921     if (!thd)
6922       thd= current_thd;
6923     DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
6924 
6925     if (!m_table_info)
6926     {
6927       if ((my_errno= check_ndb_connection(thd)))
6928         DBUG_RETURN(my_errno);
6929     }
6930 
6931     /*
6932       May need to update local copy of statistics in
6933       'm_table_info', either directly from datanodes,
6934       or from shared (mutex protected) cached copy, if:
6935        1) 'use_exact_count' has been set (by config or user).
6936        2) HA_STATUS_NO_LOCK -> read from shared cached copy.
6937        3) Local copy is invalid.
6938     */
6939     bool exact_count= THDVAR(thd, use_exact_count);
6940     if (exact_count                 ||         // 1)
6941         !(flag & HA_STATUS_NO_LOCK) ||         // 2)
6942         m_table_info == NULL        ||         // 3)
6943         m_table_info->records == ~(ha_rows)0)  // 3)
6944     {
6945       result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));
6946       if (result)
6947         DBUG_RETURN(result);
6948     }
6949     /* Read from local statistics, fast and fuzzy, wo/ locks */
6950     else
6951     {
6952       DBUG_ASSERT(m_table_info->records != ~(ha_rows)0);
6953       stats.records= m_table_info->records +
6954                      m_table_info->no_uncommitted_rows_count;
6955     }
6956 
6957     if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&
6958         thd->lex->sql_command != SQLCOM_SHOW_KEYS)
6959     {
6960       /*
6961         just use whatever stats we have. However,
6962         optimizer interprets the values 0 and 1 as EXACT:
6963           -> < 2 should not be returned.
6964       */
6965       if (stats.records < 2)
6966         stats.records= 2;
6967     }
6968     break;
6969   }
6970   /* RPK moved to variable part */
6971   if (flag & HA_STATUS_VARIABLE)
6972   {
6973     /* No meaningful way to return error */
6974     DBUG_PRINT("info", ("rec_per_key"));
6975     set_rec_per_key();
6976   }
6977   if (flag & HA_STATUS_ERRKEY)
6978   {
6979     DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
6980     errkey= m_dupkey;
6981   }
6982   if (flag & HA_STATUS_AUTO)
6983   {
6984     DBUG_PRINT("info", ("HA_STATUS_AUTO"));
6985     if (m_table && table->found_next_number_field)
6986     {
6987       if (!thd)
6988         thd= current_thd;
6989       if ((my_errno= check_ndb_connection(thd)))
6990         DBUG_RETURN(my_errno);
6991       Ndb *ndb= get_ndb(thd);
6992       Ndb_tuple_id_range_guard g(m_share);
6993 
6994       Uint64 auto_increment_value64;
6995       if (ndb->readAutoIncrementValue(m_table, g.range,
6996                                       auto_increment_value64) == -1)
6997       {
6998         const NdbError err= ndb->getNdbError();
6999         sql_print_error("Error %lu in readAutoIncrementValue(): %s",
7000                         (ulong) err.code, err.message);
7001         stats.auto_increment_value= ~(ulonglong)0;
7002       }
7003       else
7004         stats.auto_increment_value= (ulonglong)auto_increment_value64;
7005     }
7006   }
7007 
7008   if(result == -1)
7009     result= HA_ERR_NO_CONNECTION;
7010 
7011   DBUG_RETURN(result);
7012 }
7013 
7014 
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)7015 void ha_ndbcluster::get_dynamic_partition_info(PARTITION_STATS *stat_info,
7016                                                uint part_id)
7017 {
7018   DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));
7019 
7020   memset(stat_info, 0, sizeof(PARTITION_STATS));
7021   int error = 0;
7022   THD *thd = table->in_use;
7023 
7024   if (!thd)
7025     thd = current_thd;
7026   if (!m_table_info)
7027   {
7028     if ((error = check_ndb_connection(thd)))
7029       goto err;
7030   }
7031   error = update_stats(thd, 1, false, part_id);
7032 
7033   if (error == 0)
7034   {
7035     stat_info->records = stats.records;
7036     stat_info->mean_rec_length = stats.mean_rec_length;
7037     stat_info->data_file_length = stats.data_file_length;
7038     stat_info->delete_length = stats.delete_length;
7039     stat_info->max_data_file_length = stats.max_data_file_length;
7040     return;
7041   }
7042 
7043 err:
7044 
7045   DBUG_PRINT("warning",
7046     ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u",
7047      error));
7048 }
7049 
7050 
extra(enum ha_extra_function operation)7051 int ha_ndbcluster::extra(enum ha_extra_function operation)
7052 {
7053   DBUG_ENTER("extra");
7054   switch (operation) {
7055   case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
7056     DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
7057     DBUG_PRINT("info", ("Ignoring duplicate key"));
7058     m_ignore_dup_key= TRUE;
7059     break;
7060   case HA_EXTRA_NO_IGNORE_DUP_KEY:
7061     DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
7062     m_ignore_dup_key= FALSE;
7063     break;
7064   case HA_EXTRA_IGNORE_NO_KEY:
7065     DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
7066     DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7067     m_ignore_no_key= TRUE;
7068     break;
7069   case HA_EXTRA_NO_IGNORE_NO_KEY:
7070     DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
7071     DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
7072     m_ignore_no_key= FALSE;
7073     break;
7074   case HA_EXTRA_WRITE_CAN_REPLACE:
7075     DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
7076     if (!m_has_unique_index ||
7077         current_thd->slave_thread || /* always set if slave, quick fix for bug 27378 */
7078         isManualBinlogExec(current_thd)) /* or if manual binlog application, for bug 46662 */
7079     {
7080       DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
7081       m_use_write= TRUE;
7082     }
7083     break;
7084   case HA_EXTRA_WRITE_CANNOT_REPLACE:
7085     DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
7086     DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
7087     m_use_write= FALSE;
7088     break;
7089   case HA_EXTRA_DELETE_CANNOT_BATCH:
7090     DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
7091     m_delete_cannot_batch= TRUE;
7092     break;
7093   case HA_EXTRA_UPDATE_CANNOT_BATCH:
7094     DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
7095     m_update_cannot_batch= TRUE;
7096     break;
7097   // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.
7098   case HA_EXTRA_KEYREAD:
7099     DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
7100     m_disable_pushed_join= TRUE;
7101     break;
7102   case HA_EXTRA_NO_KEYREAD:
7103     DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
7104     m_disable_pushed_join= FALSE;
7105     break;
7106   default:
7107     break;
7108   }
7109 
7110   DBUG_RETURN(0);
7111 }
7112 
7113 
start_read_removal()7114 bool ha_ndbcluster::start_read_removal()
7115 {
7116   THD *thd= table->in_use;
7117   DBUG_ENTER("start_read_removal");
7118 
7119   if (uses_blob_value(table->write_set))
7120   {
7121     DBUG_PRINT("exit", ("No! Blob field in write_set"));
7122     DBUG_RETURN(false);
7123   }
7124 
7125   if (thd->lex->sql_command == SQLCOM_DELETE &&
7126       table_share->blob_fields)
7127   {
7128     DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));
7129     DBUG_RETURN(false);
7130   }
7131 
7132   if (table_share->primary_key == MAX_KEY)
7133   {
7134     DBUG_PRINT("exit", ("No! Table with hidden key"));
7135     DBUG_RETURN(false);
7136   }
7137 
7138   if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
7139   {
7140     DBUG_PRINT("exit", ("No! Updating primary key"));
7141     DBUG_RETURN(false);
7142   }
7143 
7144   if (m_has_unique_index)
7145   {
7146     for (uint i= 0; i < table_share->keys; i++)
7147     {
7148       const KEY* key= table->key_info + i;
7149       if ((key->flags & HA_NOSAME) &&
7150           bitmap_is_overlapping(table->write_set,
7151                                 m_key_fields[i]))
7152       {
7153         DBUG_PRINT("exit", ("No! Unique key %d is updated", i));
7154         DBUG_RETURN(false);
7155       }
7156     }
7157   }
7158   m_read_before_write_removal_possible= TRUE;
7159   DBUG_PRINT("exit", ("Yes, rbwr is possible!"));
7160   DBUG_RETURN(true);
7161 }
7162 
7163 
end_read_removal(void)7164 ha_rows ha_ndbcluster::end_read_removal(void)
7165 {
7166   DBUG_ENTER("end_read_removal");
7167   DBUG_ASSERT(m_read_before_write_removal_possible);
7168   DBUG_PRINT("info", ("updated: %llu, deleted: %llu",
7169                       m_rows_updated, m_rows_deleted));
7170   DBUG_RETURN(m_rows_updated + m_rows_deleted);
7171 }
7172 
7173 
reset()7174 int ha_ndbcluster::reset()
7175 {
7176   DBUG_ENTER("ha_ndbcluster::reset");
7177   if (m_cond)
7178   {
7179     m_cond->cond_clear();
7180   }
7181 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
7182   DBUG_ASSERT(m_active_query == NULL);
7183   if (m_pushed_join_operation==PUSHED_ROOT)  // Root of pushed query
7184   {
7185     delete m_pushed_join_member;             // Also delete QueryDef
7186   }
7187   m_pushed_join_member= NULL;
7188   m_pushed_join_operation= -1;
7189   m_disable_pushed_join= FALSE;
7190 #endif
7191 
7192 #if 0
7193   // Magnus, disble this "hack" until it's possible to test if
7194   // it's still needed
7195   /*
7196     Regular partition pruning will set the bitmap appropriately.
7197     Some queries like ALTER TABLE doesn't use partition pruning and
7198     thus the 'used_partitions' bitmap needs to be initialized
7199   */
7200   if (m_part_info)
7201     bitmap_set_all(&m_part_info->used_partitions);
7202 #endif
7203 
7204   /* reset flags set by extra calls */
7205   m_read_before_write_removal_possible= FALSE;
7206   m_read_before_write_removal_used= FALSE;
7207   m_rows_updated= m_rows_deleted= 0;
7208   m_ignore_dup_key= FALSE;
7209   m_use_write= FALSE;
7210   m_ignore_no_key= FALSE;
7211   m_rows_inserted= (ha_rows) 0;
7212   m_rows_to_insert= (ha_rows) 1;
7213   m_delete_cannot_batch= FALSE;
7214   m_update_cannot_batch= FALSE;
7215 
7216   assert(m_is_bulk_delete == false);
7217   m_is_bulk_delete = false;
7218   DBUG_RETURN(0);
7219 }
7220 
7221 
7222 /**
7223   Start of an insert, remember number of rows to be inserted, it will
7224   be used in write_row and get_autoincrement to send an optimal number
7225   of rows in each roundtrip to the server.
7226 
7227   @param
7228    rows     number of rows to insert, 0 if unknown
7229 */
7230 
7231 int
flush_bulk_insert(bool allow_batch)7232 ha_ndbcluster::flush_bulk_insert(bool allow_batch)
7233 {
7234   NdbTransaction *trans= m_thd_ndb->trans;
7235   DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");
7236   DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d",
7237                       (int)m_rows_inserted));
7238   DBUG_ASSERT(trans);
7239 
7240 
7241   if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))
7242   {
7243     if (!allow_batch &&
7244         execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
7245     {
7246       no_uncommitted_rows_execute_failure();
7247       DBUG_RETURN(ndb_err(trans));
7248     }
7249   }
7250   else
7251   {
7252     /*
7253       signal that transaction has been broken up and hence cannot
7254       be rolled back
7255     */
7256     THD *thd= table->in_use;
7257     thd->transaction.all.mark_modified_non_trans_table();
7258     thd->transaction.stmt.mark_modified_non_trans_table();
7259     if (execute_commit(thd, m_thd_ndb, trans, m_thd_ndb->m_force_send,
7260                        m_ignore_no_key) != 0)
7261     {
7262       no_uncommitted_rows_execute_failure();
7263       DBUG_RETURN(ndb_err(trans));
7264     }
7265     if (trans->restart() != 0)
7266     {
7267       DBUG_ASSERT(0);
7268       DBUG_RETURN(-1);
7269     }
7270   }
7271   DBUG_RETURN(0);
7272 }
7273 
start_bulk_insert(ha_rows rows)7274 void ha_ndbcluster::start_bulk_insert(ha_rows rows)
7275 {
7276   DBUG_ENTER("start_bulk_insert");
7277   DBUG_PRINT("enter", ("rows: %d", (int)rows));
7278 
7279   m_rows_inserted= (ha_rows) 0;
7280   if (!m_use_write && m_ignore_dup_key)
7281   {
7282     /*
7283       compare if expression with that in write_row
7284       we have a situation where peek_indexed_rows() will be called
7285       so we cannot batch
7286     */
7287     DBUG_PRINT("info", ("Batching turned off as duplicate key is "
7288                         "ignored by using peek_row"));
7289     m_rows_to_insert= 1;
7290     DBUG_VOID_RETURN;
7291   }
7292   if (rows == (ha_rows) 0)
7293   {
7294     /* We don't know how many will be inserted, guess */
7295     m_rows_to_insert=
7296       (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)
7297       ? m_autoincrement_prefetch
7298       : DEFAULT_AUTO_PREFETCH;
7299     m_autoincrement_prefetch= m_rows_to_insert;
7300   }
7301   else
7302   {
7303     m_rows_to_insert= rows;
7304     if (m_autoincrement_prefetch < m_rows_to_insert)
7305       m_autoincrement_prefetch= m_rows_to_insert;
7306   }
7307 
7308   DBUG_VOID_RETURN;
7309 }
7310 
7311 /**
7312   End of an insert.
7313 */
end_bulk_insert()7314 int ha_ndbcluster::end_bulk_insert()
7315 {
7316   int error= 0;
7317 
7318   DBUG_ENTER("end_bulk_insert");
7319   // Check if last inserts need to be flushed
7320 
7321   THD *thd= table->in_use;
7322   Thd_ndb *thd_ndb= m_thd_ndb;
7323 
7324   if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)
7325   {
7326     bool allow_batch= (thd_ndb->m_handler != 0);
7327     error= flush_bulk_insert(allow_batch);
7328     if (error != 0)
7329       my_errno= error;
7330   }
7331 
7332   m_rows_inserted= (ha_rows) 0;
7333   m_rows_to_insert= (ha_rows) 1;
7334   DBUG_RETURN(error);
7335 }
7336 
7337 
extra_opt(enum ha_extra_function operation,ulong cache_size)7338 int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
7339 {
7340   DBUG_ENTER("extra_opt");
7341   DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
7342   DBUG_RETURN(extra(operation));
7343 }
7344 
7345 static const char *ha_ndbcluster_exts[] = {
7346  ha_ndb_ext,
7347  NullS
7348 };
7349 
bas_ext() const7350 const char** ha_ndbcluster::bas_ext() const
7351 {
7352   return ha_ndbcluster_exts;
7353 }
7354 
7355 /**
7356   How many seeks it will take to read through the table.
7357 
7358   This is to be comparable to the number returned by records_in_range so
7359   that we can decide if we should scan the table or use keys.
7360 */
7361 
scan_time()7362 double ha_ndbcluster::scan_time()
7363 {
7364   DBUG_ENTER("ha_ndbcluster::scan_time()");
7365   double res= rows2double(stats.records*1000);
7366   DBUG_PRINT("exit", ("table: %s value: %f",
7367                       m_tabname, res));
7368   DBUG_RETURN(res);
7369 }
7370 
7371 /*
7372   Convert MySQL table locks into locks supported by Ndb Cluster.
7373   Note that MySQL Cluster does currently not support distributed
7374   table locks, so to be safe one should set cluster in Single
7375   User Mode, before relying on table locks when updating tables
7376   from several MySQL servers
7377 */
7378 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)7379 THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
7380                                           THR_LOCK_DATA **to,
7381                                           enum thr_lock_type lock_type)
7382 {
7383   DBUG_ENTER("store_lock");
7384   if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK)
7385   {
7386 
7387     /* If we are not doing a LOCK TABLE, then allow multiple
7388        writers */
7389 
7390     /* Since NDB does not currently have table locks
7391        this is treated as a ordinary lock */
7392 
7393     const bool in_lock_tables = thd_in_lock_tables(thd);
7394     const uint sql_command = thd_sql_command(thd);
7395     if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
7396          lock_type <= TL_WRITE) &&
7397         !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))
7398       lock_type= TL_WRITE_ALLOW_WRITE;
7399 
7400     /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
7401        MySQL would use the lock TL_READ_NO_INSERT on t2, and that
7402        would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
7403        to t2. Convert the lock to a normal read lock to allow
7404        concurrent inserts to t2. */
7405 
7406     if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
7407       lock_type= TL_READ;
7408 
7409     /**
7410      * We need locks on source table when
7411      *   doing offline alter...
7412      * In 5.1 this worked due to TL_WRITE_ALLOW_READ...
7413      * but that has been removed in 5.5
7414      * I simply add this to get it...
7415      */
7416     if (sql_command == SQLCOM_ALTER_TABLE)
7417       lock_type = TL_WRITE;
7418 
7419     m_lock.type=lock_type;
7420   }
7421   *to++= &m_lock;
7422 
7423   DBUG_PRINT("exit", ("lock_type: %d", lock_type));
7424 
7425   DBUG_RETURN(to);
7426 }
7427 
7428 /*
7429   As MySQL will execute an external lock for every new table it uses
7430   we can use this to start the transactions.
7431   If we are in auto_commit mode we just need to start a transaction
7432   for the statement, this will be stored in thd_ndb.stmt.
7433   If not, we have to start a master transaction if there doesn't exist
7434   one from before, this will be stored in thd_ndb.all
7435 
7436   When a table lock is held one transaction will be started which holds
7437   the table lock and for each statement a hupp transaction will be started
7438   If we are locking the table then:
7439   - save the NdbDictionary::Table for easy access
7440   - save reference to table statistics
7441   - refresh list of the indexes for the table if needed (if altered)
7442  */
7443 
7444 #ifdef HAVE_NDB_BINLOG
ndbcluster_update_apply_status(THD * thd,int do_update)7445 static int ndbcluster_update_apply_status(THD *thd, int do_update)
7446 {
7447   Thd_ndb *thd_ndb= get_thd_ndb(thd);
7448   Ndb *ndb= thd_ndb->ndb;
7449   NDBDICT *dict= ndb->getDictionary();
7450   const NDBTAB *ndbtab;
7451   NdbTransaction *trans= thd_ndb->trans;
7452   ndb->setDatabaseName(NDB_REP_DB);
7453   Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
7454   if (!(ndbtab= ndbtab_g.get_table()))
7455   {
7456     return -1;
7457   }
7458   NdbOperation *op= 0;
7459   int r= 0;
7460   r|= (op= trans->getNdbOperation(ndbtab)) == 0;
7461   DBUG_ASSERT(r == 0);
7462   if (do_update)
7463     r|= op->updateTuple();
7464   else
7465     r|= op->writeTuple();
7466   DBUG_ASSERT(r == 0);
7467   // server_id
7468   r|= op->equal(0u, (Uint32)thd->server_id);
7469   DBUG_ASSERT(r == 0);
7470   if (!do_update)
7471   {
7472     // epoch
7473     r|= op->setValue(1u, (Uint64)0);
7474     DBUG_ASSERT(r == 0);
7475   }
7476   const char* group_master_log_name =
7477     ndb_mi_get_group_master_log_name();
7478   const Uint64 group_master_log_pos =
7479     ndb_mi_get_group_master_log_pos();
7480   const Uint64 future_event_relay_log_pos =
7481     ndb_mi_get_future_event_relay_log_pos();
7482   const Uint64 group_relay_log_pos =
7483     ndb_mi_get_group_relay_log_pos();
7484 
7485   // log_name
7486   char tmp_buf[FN_REFLEN];
7487   ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
7488                    group_master_log_name, strlen(group_master_log_name));
7489   r|= op->setValue(2u, tmp_buf);
7490   DBUG_ASSERT(r == 0);
7491   // start_pos
7492   r|= op->setValue(3u, group_master_log_pos);
7493   DBUG_ASSERT(r == 0);
7494   // end_pos
7495   r|= op->setValue(4u, group_master_log_pos +
7496                    (future_event_relay_log_pos - group_relay_log_pos));
7497   DBUG_ASSERT(r == 0);
7498   return 0;
7499 }
7500 #endif /* HAVE_NDB_BINLOG */
7501 
transaction_checks(THD * thd,Thd_ndb * thd_ndb)7502 static void transaction_checks(THD *thd, Thd_ndb *thd_ndb)
7503 {
7504   if (thd->lex->sql_command == SQLCOM_LOAD)
7505     thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7506   else if (!thd->transaction.flags.enabled)
7507     thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7508   else if (!THDVAR(thd, use_transactions))
7509     thd_ndb->trans_options|= TNTO_TRANSACTIONS_OFF;
7510   thd_ndb->m_force_send= THDVAR(thd, force_send);
7511   if (!thd->slave_thread)
7512     thd_ndb->m_batch_size= THDVAR(thd, batch_size);
7513   else
7514   {
7515     thd_ndb->m_batch_size= THDVAR(NULL, batch_size); /* using global value */
7516     /* Do not use hinted TC selection in slave thread */
7517     THDVAR(thd, optimized_node_selection)=
7518       THDVAR(NULL, optimized_node_selection) & 1; /* using global value */
7519   }
7520 }
7521 
start_statement(THD * thd,Thd_ndb * thd_ndb,uint table_count)7522 int ha_ndbcluster::start_statement(THD *thd,
7523                                    Thd_ndb *thd_ndb,
7524                                    uint table_count)
7525 {
7526   NdbTransaction *trans= thd_ndb->trans;
7527   int error;
7528   DBUG_ENTER("ha_ndbcluster::start_statement");
7529 
7530   m_thd_ndb= thd_ndb;
7531   transaction_checks(thd, m_thd_ndb);
7532 
7533   if (table_count == 0)
7534   {
7535     trans_register_ha(thd, FALSE, ndbcluster_hton);
7536     if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
7537     {
7538       if (!trans)
7539         trans_register_ha(thd, TRUE, ndbcluster_hton);
7540       thd_ndb->m_handler= NULL;
7541     }
7542     else
7543     {
7544       /*
7545         this is an autocommit, we may keep a reference to the
7546         handler to be used in the commit phase for optimization
7547         reasons, defering execute
7548       */
7549       thd_ndb->m_handler= this;
7550     }
7551   }
7552   else
7553   {
7554     /*
7555       there is more than one handler involved, execute deferal
7556       not possible
7557     */
7558     ha_ndbcluster* handler = thd_ndb->m_handler;
7559     thd_ndb->m_handler= NULL;
7560     if (handler != NULL)
7561     {
7562       /**
7563        * If we initially belived that this could be run
7564        *  using execute deferal...but changed out mind
7565        *  add handler to thd_ndb->open_tables like it would
7566        *  have done "normally"
7567        */
7568       add_handler_to_open_tables(thd, thd_ndb, handler);
7569     }
7570   }
7571   if (!trans && table_count == 0)
7572   {
7573     DBUG_ASSERT(thd_ndb->changed_tables.is_empty() == TRUE);
7574     thd_ndb->trans_options= 0;
7575 
7576     DBUG_PRINT("trans",("Possibly starting transaction"));
7577     const uint opti_node_select = THDVAR(thd, optimized_node_selection);
7578     DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));
7579     if (!(opti_node_select & 2) ||
7580         thd->lex->sql_command == SQLCOM_LOAD)
7581       if (unlikely(!start_transaction(error)))
7582         DBUG_RETURN(error);
7583 
7584     thd_ndb->init_open_tables();
7585     thd_ndb->m_slow_path= FALSE;
7586     if (!(thd_options(thd) & OPTION_BIN_LOG) ||
7587         thd->variables.binlog_format == BINLOG_FORMAT_STMT)
7588     {
7589       thd_ndb->trans_options|= TNTO_NO_LOGGING;
7590       thd_ndb->m_slow_path= TRUE;
7591     }
7592     else if (thd->slave_thread)
7593       thd_ndb->m_slow_path= TRUE;
7594   }
7595   /*
7596     If this is the start of a LOCK TABLE, a table look
7597     should be taken on the table in NDB
7598 
7599     Check if it should be read or write lock
7600   */
7601   if (thd_options(thd) & (OPTION_TABLE_LOCK))
7602   {
7603     /* This is currently dead code in wait for implementation in NDB */
7604     /* lockThisTable(); */
7605     DBUG_PRINT("info", ("Locking the table..." ));
7606 #ifdef NOT_YET
7607     push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
7608                         ER_GET_ERRMSG, ER(ER_GET_ERRMSG), 0,
7609                         "Table only locked locally in this mysqld", "NDB");
7610 #endif
7611   }
7612   DBUG_RETURN(0);
7613 }
7614 
7615 int
add_handler_to_open_tables(THD * thd,Thd_ndb * thd_ndb,ha_ndbcluster * handler)7616 ha_ndbcluster::add_handler_to_open_tables(THD *thd,
7617                                           Thd_ndb *thd_ndb,
7618                                           ha_ndbcluster* handler)
7619 {
7620   DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");
7621   DBUG_PRINT("info", ("Adding %s", handler->m_share->key));
7622 
7623   /**
7624    * thd_ndb->open_tables is only used iff thd_ndb->m_handler is not
7625    */
7626   DBUG_ASSERT(thd_ndb->m_handler == NULL);
7627   const void *key= handler->m_share;
7628   HASH_SEARCH_STATE state;
7629   THD_NDB_SHARE *thd_ndb_share=
7630     (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,
7631                                   (const uchar *)&key, sizeof(key),
7632                                   &state);
7633   while (thd_ndb_share && thd_ndb_share->key != key)
7634   {
7635     thd_ndb_share=
7636       (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,
7637                                    (const uchar *)&key, sizeof(key),
7638                                    &state);
7639   }
7640   if (thd_ndb_share == 0)
7641   {
7642     thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root,
7643                                                 sizeof(THD_NDB_SHARE));
7644     if (!thd_ndb_share)
7645     {
7646       mem_alloc_error(sizeof(THD_NDB_SHARE));
7647       DBUG_RETURN(1);
7648     }
7649     thd_ndb_share->key= key;
7650     thd_ndb_share->stat.last_count= thd_ndb->count;
7651     thd_ndb_share->stat.no_uncommitted_rows_count= 0;
7652     thd_ndb_share->stat.records= ~(ha_rows)0;
7653     my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);
7654   }
7655   else if (thd_ndb_share->stat.last_count != thd_ndb->count)
7656   {
7657     thd_ndb_share->stat.last_count= thd_ndb->count;
7658     thd_ndb_share->stat.no_uncommitted_rows_count= 0;
7659     thd_ndb_share->stat.records= ~(ha_rows)0;
7660   }
7661 
7662   handler->m_table_info= &thd_ndb_share->stat;
7663   DBUG_RETURN(0);
7664 }
7665 
init_handler_for_statement(THD * thd)7666 int ha_ndbcluster::init_handler_for_statement(THD *thd)
7667 {
7668   /*
7669     This is the place to make sure this handler instance
7670     has a started transaction.
7671 
7672     The transaction is started by the first handler on which
7673     MySQL Server calls external lock
7674 
7675     Other handlers in the same stmt or transaction should use
7676     the same NDB transaction. This is done by setting up the m_thd_ndb
7677     pointer to point to the NDB transaction object.
7678    */
7679 
7680   DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
7681   Thd_ndb *thd_ndb= m_thd_ndb;
7682   DBUG_ASSERT(thd_ndb);
7683 
7684   // store thread specific data first to set the right context
7685   m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);
7686   // Start of transaction
7687   m_rows_changed= 0;
7688   m_blobs_pending= FALSE;
7689   release_blobs_buffer();
7690   m_slow_path= m_thd_ndb->m_slow_path;
7691 #ifdef HAVE_NDB_BINLOG
7692   if (unlikely(m_slow_path))
7693   {
7694     if (m_share == ndb_apply_status_share && thd->slave_thread)
7695         m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
7696   }
7697 #endif
7698 
7699   int ret = 0;
7700   if (thd_ndb->m_handler == 0)
7701   {
7702     DBUG_ASSERT(m_share);
7703     ret = add_handler_to_open_tables(thd, thd_ndb, this);
7704   }
7705   else
7706   {
7707     struct Ndb_local_table_statistics &stat= m_table_info_instance;
7708     stat.last_count= thd_ndb->count;
7709     stat.no_uncommitted_rows_count= 0;
7710     stat.records= ~(ha_rows)0;
7711     m_table_info= &stat;
7712   }
7713   DBUG_RETURN(ret);
7714 }
7715 
external_lock(THD * thd,int lock_type)7716 int ha_ndbcluster::external_lock(THD *thd, int lock_type)
7717 {
7718   DBUG_ENTER("external_lock");
7719   if (lock_type != F_UNLCK)
7720   {
7721     int error;
7722     /*
7723       Check that this handler instance has a connection
7724       set up to the Ndb object of thd
7725     */
7726     if (check_ndb_connection(thd))
7727       DBUG_RETURN(1);
7728     Thd_ndb *thd_ndb= get_thd_ndb(thd);
7729 
7730     DBUG_PRINT("enter", ("lock_type != F_UNLCK "
7731                          "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
7732                          "thd_ndb->lock_count: %d",
7733                          (long) this, (long) thd, (long) thd_ndb,
7734                          thd_ndb->lock_count));
7735 
7736     if ((error= start_statement(thd, thd_ndb,
7737                                 thd_ndb->lock_count++)))
7738     {
7739       thd_ndb->lock_count--;
7740       DBUG_RETURN(error);
7741     }
7742     if ((error= init_handler_for_statement(thd)))
7743     {
7744       thd_ndb->lock_count--;
7745       DBUG_RETURN(error);
7746     }
7747     DBUG_RETURN(0);
7748   }
7749   else
7750   {
7751     Thd_ndb *thd_ndb= m_thd_ndb;
7752     DBUG_ASSERT(thd_ndb);
7753 
7754     DBUG_PRINT("enter", ("lock_type == F_UNLCK "
7755                          "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
7756                          "thd_ndb->lock_count: %d",
7757                          (long) this, (long) thd, (long) thd_ndb,
7758                          thd_ndb->lock_count));
7759 
7760     if (m_rows_changed && global_system_variables.query_cache_type)
7761     {
7762       DBUG_PRINT("info", ("Rows has changed"));
7763 
7764       if (thd_ndb->trans &&
7765           thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
7766       {
7767         DBUG_PRINT("info", ("Add share to list of changed tables, %p",
7768                             m_share));
7769         /* NOTE push_back allocates memory using transactions mem_root! */
7770         thd_ndb->changed_tables.push_back(get_share(m_share),
7771                                           &thd->transaction.mem_root);
7772       }
7773 
7774       if (opt_ndb_cache_check_time)
7775       {
7776         pthread_mutex_lock(&m_share->mutex);
7777         DBUG_PRINT("info", ("Invalidating commit_count"));
7778         m_share->commit_count= 0;
7779         m_share->commit_count_lock++;
7780         pthread_mutex_unlock(&m_share->mutex);
7781       }
7782     }
7783 
7784     if (!--thd_ndb->lock_count)
7785     {
7786       DBUG_PRINT("trans", ("Last external_lock"));
7787 
7788       if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&
7789           thd_ndb->trans)
7790       {
7791         if (thd_ndb->trans)
7792         {
7793           /*
7794             Unlock is done without a transaction commit / rollback.
7795             This happens if the thread didn't update any rows
7796             We must in this case close the transaction to release resources
7797           */
7798           DBUG_PRINT("trans",("ending non-updating transaction"));
7799           thd_ndb->ndb->closeTransaction(thd_ndb->trans);
7800           thd_ndb->trans= NULL;
7801           thd_ndb->m_handler= NULL;
7802         }
7803       }
7804     }
7805     m_table_info= NULL;
7806 
7807     /*
7808       This is the place to make sure this handler instance
7809       no longer are connected to the active transaction.
7810 
7811       And since the handler is no longer part of the transaction
7812       it can't have open cursors, ops, queries or blobs pending.
7813     */
7814     m_thd_ndb= NULL;
7815 
7816     if (m_active_query)
7817       DBUG_PRINT("warning", ("m_active_query != NULL"));
7818     m_active_query= NULL;
7819 
7820     if (m_active_cursor)
7821       DBUG_PRINT("warning", ("m_active_cursor != NULL"));
7822     m_active_cursor= NULL;
7823 
7824     if (m_multi_cursor)
7825       DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
7826     m_multi_cursor= NULL;
7827 
7828     if (m_blobs_pending)
7829       DBUG_PRINT("warning", ("blobs_pending != 0"));
7830     m_blobs_pending= 0;
7831 
7832     DBUG_RETURN(0);
7833   }
7834 }
7835 
7836 /**
7837   Unlock the last row read in an open scan.
7838   Rows are unlocked by default in ndb, but
7839   for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
7840   locks are kept if unlock_row() is not called.
7841 */
7842 
unlock_row()7843 void ha_ndbcluster::unlock_row()
7844 {
7845   DBUG_ENTER("unlock_row");
7846 
7847   DBUG_PRINT("info", ("Unlocking row"));
7848   m_lock_tuple= FALSE;
7849   DBUG_VOID_RETURN;
7850 }
7851 
7852 /**
7853   Start statement, used when one of the tables are locked and also when
7854   a stored function is executed.
7855 
7856   start_stmt()
7857     thd                    Thd object
7858     lock_type              Lock type on table
7859 
7860   RETURN VALUE
7861     0                      Success
7862     >0                     Error code
7863 
7864   DESCRIPTION
7865     This call indicates the start of a statement when one of the tables in
7866     the statement are locked. In this case we cannot call external_lock.
7867     It also implies that external_lock is not called at end of statement.
7868     Rather the handlerton call commit (ndbcluster_commit) is called to
7869     indicate end of transaction. There are cases thus when the commit call
7870     actually doesn't refer to a commit but only to and end of statement.
7871 
7872     In the case of stored functions, one stored function is treated as one
7873     statement and the call to commit comes at the end of the stored function.
7874 */
7875 
start_stmt(THD * thd,thr_lock_type lock_type)7876 int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
7877 {
7878   int error=0;
7879   Thd_ndb *thd_ndb;
7880   DBUG_ENTER("start_stmt");
7881   DBUG_ASSERT(thd == table->in_use);
7882 
7883   thd_ndb= get_thd_ndb(thd);
7884   if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))
7885     goto error;
7886   if ((error= init_handler_for_statement(thd)))
7887     goto error;
7888   DBUG_RETURN(0);
7889 error:
7890   thd_ndb->start_stmt_count--;
7891   DBUG_RETURN(error);
7892 }
7893 
7894 NdbTransaction *
start_transaction_row(const NdbRecord * ndb_record,const uchar * record,int & error)7895 ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,
7896                                      const uchar *record,
7897                                      int &error)
7898 {
7899   NdbTransaction *trans;
7900   DBUG_ENTER("ha_ndbcluster::start_transaction_row");
7901   DBUG_ASSERT(m_thd_ndb);
7902   DBUG_ASSERT(m_thd_ndb->trans == NULL);
7903 
7904   transaction_checks(table->in_use, m_thd_ndb);
7905 
7906   Ndb *ndb= m_thd_ndb->ndb;
7907 
7908   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
7909   char *buf= (char*)&tmp[0];
7910   trans= ndb->startTransaction(ndb_record,
7911                                (const char*)record,
7912                                buf, sizeof(tmp));
7913 
7914   if (trans)
7915   {
7916     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7917     DBUG_PRINT("info", ("Delayed allocation of TC"));
7918     DBUG_RETURN(m_thd_ndb->trans= trans);
7919   }
7920 
7921   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7922   DBUG_RETURN(NULL);
7923 }
7924 
7925 NdbTransaction *
start_transaction_key(uint inx_no,const uchar * key_data,int & error)7926 ha_ndbcluster::start_transaction_key(uint inx_no,
7927                                      const uchar *key_data,
7928                                      int &error)
7929 {
7930   NdbTransaction *trans;
7931   DBUG_ENTER("ha_ndbcluster::start_transaction_key");
7932   DBUG_ASSERT(m_thd_ndb);
7933   DBUG_ASSERT(m_thd_ndb->trans == NULL);
7934 
7935   transaction_checks(table->in_use, m_thd_ndb);
7936 
7937   Ndb *ndb= m_thd_ndb->ndb;
7938   const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;
7939 
7940   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
7941   char *buf= (char*)&tmp[0];
7942   trans= ndb->startTransaction(key_rec,
7943                                (const char*)key_data,
7944                                buf, sizeof(tmp));
7945 
7946   if (trans)
7947   {
7948     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7949     DBUG_PRINT("info", ("Delayed allocation of TC"));
7950     DBUG_RETURN(m_thd_ndb->trans= trans);
7951   }
7952 
7953   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7954   DBUG_RETURN(NULL);
7955 }
7956 
7957 NdbTransaction *
start_transaction(int & error)7958 ha_ndbcluster::start_transaction(int &error)
7959 {
7960   NdbTransaction *trans;
7961   DBUG_ENTER("ha_ndbcluster::start_transaction");
7962 
7963   DBUG_ASSERT(m_thd_ndb);
7964   DBUG_ASSERT(m_thd_ndb->trans == NULL);
7965 
7966   transaction_checks(table->in_use, m_thd_ndb);
7967   const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);
7968   m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);
7969   if ((trans= m_thd_ndb->ndb->startTransaction()))
7970   {
7971     m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;
7972     DBUG_PRINT("info", ("Delayed allocation of TC"));
7973     DBUG_RETURN(m_thd_ndb->trans= trans);
7974   }
7975 
7976   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7977   DBUG_RETURN(NULL);
7978 }
7979 
7980 NdbTransaction *
start_transaction_part_id(Uint32 part_id,int & error)7981 ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)
7982 {
7983   NdbTransaction *trans;
7984   DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");
7985 
7986   DBUG_ASSERT(m_thd_ndb);
7987   DBUG_ASSERT(m_thd_ndb->trans == NULL);
7988 
7989   transaction_checks(table->in_use, m_thd_ndb);
7990   if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))
7991   {
7992     m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
7993     DBUG_PRINT("info", ("Delayed allocation of TC"));
7994     DBUG_RETURN(m_thd_ndb->trans= trans);
7995   }
7996 
7997   ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
7998   DBUG_RETURN(NULL);
7999 }
8000 
8001 
8002 /**
8003   Commit a transaction started in NDB.
8004 */
8005 
ndbcluster_commit(handlerton * hton,THD * thd,bool all)8006 int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
8007 {
8008   int res= 0;
8009   Thd_ndb *thd_ndb= get_thd_ndb(thd);
8010   Ndb *ndb= thd_ndb->ndb;
8011   NdbTransaction *trans= thd_ndb->trans;
8012 
8013   DBUG_ENTER("ndbcluster_commit");
8014   DBUG_ASSERT(ndb);
8015   DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));
8016   thd_ndb->start_stmt_count= 0;
8017   if (trans == NULL)
8018   {
8019     DBUG_PRINT("info", ("trans == NULL"));
8020     DBUG_RETURN(0);
8021   }
8022   if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
8023   {
8024     /*
8025       An odditity in the handler interface is that commit on handlerton
8026       is called to indicate end of statement only in cases where
8027       autocommit isn't used and the all flag isn't set.
8028 
8029       We also leave quickly when a transaction haven't even been started,
8030       in this case we are safe that no clean up is needed. In this case
8031       the MySQL Server could handle the query without contacting the
8032       NDB kernel.
8033     */
8034     thd_ndb->save_point_count++;
8035     DBUG_PRINT("info", ("Commit before start or end-of-statement only"));
8036     DBUG_RETURN(0);
8037   }
8038   thd_ndb->save_point_count= 0;
8039 
8040 #ifdef HAVE_NDB_BINLOG
8041   if (unlikely(thd_ndb->m_slow_path))
8042   {
8043     if (thd->slave_thread)
8044       ndbcluster_update_apply_status
8045         (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
8046   }
8047 #endif /* HAVE_NDB_BINLOG */
8048 
8049   if (thd->slave_thread)
8050   {
8051     if (!g_ndb_slave_state.current_conflict_defined_op_count ||
8052         !thd_ndb->m_unsent_bytes ||
8053         !(res= execute_no_commit(thd_ndb, trans, TRUE)))
8054       res= execute_commit(thd, thd_ndb, trans, 1, TRUE);
8055 
8056     update_slave_api_stats(thd_ndb->ndb);
8057   }
8058   else
8059   {
8060     if (thd_ndb->m_handler &&
8061         thd_ndb->m_handler->m_read_before_write_removal_possible)
8062     {
8063       /*
8064         This is an autocommit involving only one table and
8065         rbwr is on, thus the transaction has already been
8066         committed in exec_bulk_update() or end_bulk_delete()
8067       */
8068       DBUG_PRINT("info", ("autocommit+rbwr, transaction already comitted"));
8069       if (trans->commitStatus() != NdbTransaction::Committed)
8070       {
8071         sql_print_error("found uncomitted autocommit+rbwr transaction, "
8072                         "commit status: %d", trans->commitStatus());
8073         abort();
8074       }
8075     }
8076     else
8077       res= execute_commit(thd, thd_ndb, trans, THDVAR(thd, force_send), FALSE);
8078   }
8079 
8080   if (res != 0)
8081   {
8082     const NdbError err= trans->getNdbError();
8083     const NdbOperation *error_op= trans->getNdbErrorOperation();
8084     res= ndb_to_mysql_error(&err);
8085     if (res != -1)
8086       ndbcluster_print_error(res, error_op);
8087   }
8088   else
8089   {
8090     /* Update shared statistics for tables inserted into / deleted from*/
8091     if (thd_ndb->m_handler &&      // Autocommit Txn
8092         thd_ndb->m_handler->m_share &&
8093         thd_ndb->m_handler->m_table_info)
8094     {
8095       modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);
8096     }
8097 
8098     /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */
8099     for (uint i= 0; i<thd_ndb->open_tables.records; i++)
8100     {
8101       THD_NDB_SHARE *thd_share=
8102         (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);
8103       modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);
8104     }
8105   }
8106 
8107   ndb->closeTransaction(trans);
8108   thd_ndb->trans= NULL;
8109   thd_ndb->m_handler= NULL;
8110 
8111   /* Clear commit_count for tables changed by transaction */
8112   NDB_SHARE* share;
8113   List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8114   while ((share= it++))
8115   {
8116     DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8117                         share));
8118     pthread_mutex_lock(&share->mutex);
8119     DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
8120                         share->table_name, (ulong) share->commit_count));
8121     share->commit_count= 0;
8122     share->commit_count_lock++;
8123     pthread_mutex_unlock(&share->mutex);
8124     free_share(&share);
8125   }
8126   thd_ndb->changed_tables.empty();
8127 
8128   DBUG_RETURN(res);
8129 }
8130 
8131 
8132 /**
8133   Rollback a transaction started in NDB.
8134 */
8135 
ndbcluster_rollback(handlerton * hton,THD * thd,bool all)8136 static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
8137 {
8138   int res= 0;
8139   Thd_ndb *thd_ndb= get_thd_ndb(thd);
8140   Ndb *ndb= thd_ndb->ndb;
8141   NdbTransaction *trans= thd_ndb->trans;
8142 
8143   DBUG_ENTER("ndbcluster_rollback");
8144   DBUG_PRINT("enter", ("all: %d  thd_ndb->save_point_count: %d",
8145                        all, thd_ndb->save_point_count));
8146   DBUG_ASSERT(ndb);
8147   thd_ndb->start_stmt_count= 0;
8148   if (trans == NULL)
8149   {
8150     /* Ignore end-of-statement until real rollback or commit is called */
8151     DBUG_PRINT("info", ("trans == NULL"));
8152     DBUG_RETURN(0);
8153   }
8154   if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
8155       (thd_ndb->save_point_count > 0))
8156   {
8157     /*
8158       Ignore end-of-statement until real rollback or commit is called
8159       as ndb does not support rollback statement
8160       - mark that rollback was unsuccessful, this will cause full rollback
8161       of the transaction
8162     */
8163     DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));
8164     thd->mark_transaction_to_rollback(1);
8165     my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");
8166     DBUG_RETURN(0);
8167   }
8168   thd_ndb->save_point_count= 0;
8169   if (thd->slave_thread)
8170     g_ndb_slave_state.atTransactionAbort();
8171   thd_ndb->m_unsent_bytes= 0;
8172   thd_ndb->m_execute_count++;
8173   DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
8174   if (trans->execute(NdbTransaction::Rollback) != 0)
8175   {
8176     const NdbError err= trans->getNdbError();
8177     const NdbOperation *error_op= trans->getNdbErrorOperation();
8178     res= ndb_to_mysql_error(&err);
8179     if (res != -1)
8180       ndbcluster_print_error(res, error_op);
8181   }
8182   ndb->closeTransaction(trans);
8183   thd_ndb->trans= NULL;
8184   thd_ndb->m_handler= NULL;
8185 
8186   /* Clear list of tables changed by transaction */
8187   NDB_SHARE* share;
8188   List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
8189   while ((share= it++))
8190   {
8191     DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
8192                         share));
8193     free_share(&share);
8194   }
8195   thd_ndb->changed_tables.empty();
8196 
8197   if (thd->slave_thread)
8198     update_slave_api_stats(thd_ndb->ndb);
8199 
8200   DBUG_RETURN(res);
8201 }
8202 
8203 /**
8204  * Support for create table/column modifiers
8205  *   by exploiting the comment field
8206  */
8207 struct NDB_Modifier
8208 {
8209   enum { M_BOOL } m_type;
8210   const char * m_name;
8211   size_t m_name_len;
8212   bool m_found;
8213   union {
8214     bool m_val_bool;
8215 #ifdef TODO__
8216     int m_val_int;
8217     struct {
8218       const char * str;
8219       size_t len;
8220     } m_val_str;
8221 #endif
8222   };
8223 };
8224 
8225 static const
8226 struct NDB_Modifier ndb_table_modifiers[] =
8227 {
8228   { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },
8229   { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
8230 };
8231 
8232 static const
8233 struct NDB_Modifier ndb_column_modifiers[] =
8234 {
8235   { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },
8236   { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
8237 };
8238 
8239 /**
8240  * NDB_Modifiers
8241  *
8242  * This class implements a simple parser for getting modifiers out
8243  *   of a string (e.g a comment field)
8244  */
8245 class NDB_Modifiers
8246 {
8247 public:
8248   NDB_Modifiers(const NDB_Modifier modifiers[]);
8249   ~NDB_Modifiers();
8250 
8251   /**
8252    * parse string-with length (not necessarily NULL terminated)
8253    */
8254   int parse(THD* thd, const char * prefix, const char * str, size_t strlen);
8255 
8256   /**
8257    * Get modifier...returns NULL if unknown
8258    */
8259   const NDB_Modifier * get(const char * name) const;
8260 private:
8261   uint m_len;
8262   struct NDB_Modifier * m_modifiers;
8263 
8264   int parse_modifier(THD *thd, const char * prefix,
8265                      struct NDB_Modifier* m, const char * str);
8266 };
8267 
8268 static
8269 bool
end_of_token(const char * str)8270 end_of_token(const char * str)
8271 {
8272   return str[0] == 0 || str[0] == ' ' || str[0] == ',';
8273 }
8274 
NDB_Modifiers(const NDB_Modifier modifiers[])8275 NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])
8276 {
8277   for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)
8278   {}
8279   m_modifiers = new NDB_Modifier[m_len];
8280   memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));
8281 }
8282 
~NDB_Modifiers()8283 NDB_Modifiers::~NDB_Modifiers()
8284 {
8285   delete [] m_modifiers;
8286 }
8287 
8288 int
parse_modifier(THD * thd,const char * prefix,struct NDB_Modifier * m,const char * str)8289 NDB_Modifiers::parse_modifier(THD *thd,
8290                               const char * prefix,
8291                               struct NDB_Modifier* m,
8292                               const char * str)
8293 {
8294   if (m->m_found)
8295   {
8296     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8297                         ER_ILLEGAL_HA_CREATE_OPTION,
8298                         "%s : modifier %s specified twice",
8299                         prefix, m->m_name);
8300   }
8301 
8302   switch(m->m_type){
8303   case NDB_Modifier::M_BOOL:
8304     if (end_of_token(str))
8305     {
8306       m->m_val_bool = true;
8307       goto found;
8308     }
8309     if (str[0] != '=')
8310       break;
8311 
8312     str++;
8313     if (str[0] == '1' && end_of_token(str+1))
8314     {
8315       m->m_val_bool = true;
8316       goto found;
8317     }
8318 
8319     if (str[0] == '0' && end_of_token(str+1))
8320     {
8321       m->m_val_bool = false;
8322       goto found;
8323     }
8324   }
8325 
8326   {
8327     const char * end = strpbrk(str, " ,");
8328     if (end)
8329     {
8330       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8331                           ER_ILLEGAL_HA_CREATE_OPTION,
8332                           "%s : invalid value '%.*s' for %s",
8333                           prefix, (int)(end - str), str, m->m_name);
8334     }
8335     else
8336     {
8337       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8338                           ER_ILLEGAL_HA_CREATE_OPTION,
8339                           "%s : invalid value '%s' for %s",
8340                           prefix, str, m->m_name);
8341     }
8342   }
8343   return -1;
8344 found:
8345   m->m_found = true;
8346   return 0;
8347 }
8348 
8349 int
parse(THD * thd,const char * prefix,const char * _source,size_t _source_len)8350 NDB_Modifiers::parse(THD *thd,
8351                      const char * prefix,
8352                      const char * _source,
8353                      size_t _source_len)
8354 {
8355   if (_source == 0 || _source_len == 0)
8356     return 0;
8357 
8358   const char * source = 0;
8359 
8360   /**
8361    * Check if _source is NULL-terminated
8362    */
8363   for (size_t i = 0; i<_source_len; i++)
8364   {
8365     if (_source[i] == 0)
8366     {
8367       source = _source;
8368       break;
8369     }
8370   }
8371 
8372   if (source == 0)
8373   {
8374     /**
8375      * Make NULL terminated string so that strXXX-functions are safe
8376      */
8377     char * tmp = new char[_source_len+1];
8378     if (tmp == 0)
8379     {
8380       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8381                           ER_ILLEGAL_HA_CREATE_OPTION,
8382                           "%s : unable to parse due to out of memory",
8383                           prefix);
8384       return -1;
8385     }
8386     memcpy(tmp, _source, _source_len);
8387     tmp[_source_len] = 0;
8388     source = tmp;
8389   }
8390 
8391   const char * pos = source;
8392   if ((pos = strstr(pos, prefix)) == 0)
8393   {
8394     if (source != _source)
8395       delete [] source;
8396     return 0;
8397   }
8398 
8399   pos += strlen(prefix);
8400 
8401   while (pos && pos[0] != 0 && pos[0] != ' ')
8402   {
8403     const char * end = strpbrk(pos, " ,"); // end of current modifier
8404 
8405     for (uint i = 0; i < m_len; i++)
8406     {
8407       size_t l = m_modifiers[i].m_name_len;
8408       if (strncmp(pos, m_modifiers[i].m_name, l) == 0)
8409       {
8410         /**
8411          * Found modifier...
8412          */
8413 
8414         if (! (end_of_token(pos + l) || pos[l] == '='))
8415           goto unknown;
8416 
8417         pos += l;
8418         int res = parse_modifier(thd, prefix, m_modifiers+i, pos);
8419 
8420         if (res == -1)
8421         {
8422           /**
8423            * We continue parsing even if modifier had error
8424            */
8425         }
8426 
8427         goto next;
8428       }
8429     }
8430 
8431     {
8432   unknown:
8433       if (end)
8434       {
8435         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8436                             ER_ILLEGAL_HA_CREATE_OPTION,
8437                             "%s : unknown modifier: %.*s",
8438                             prefix, (int)(end - pos), pos);
8439       }
8440       else
8441       {
8442         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8443                             ER_ILLEGAL_HA_CREATE_OPTION,
8444                             "%s : unknown modifier: %s",
8445                             prefix, pos);
8446       }
8447     }
8448 
8449 next:
8450     pos = end;
8451     if (pos && pos[0] == ',')
8452       pos++;
8453   }
8454 
8455   if (source != _source)
8456     delete [] source;
8457 
8458   return 0;
8459 }
8460 
8461 const NDB_Modifier *
get(const char * name) const8462 NDB_Modifiers::get(const char * name) const
8463 {
8464   for (uint i = 0; i < m_len; i++)
8465   {
8466     if (strcmp(name, m_modifiers[i].m_name) == 0)
8467     {
8468       return m_modifiers + i;
8469     }
8470   }
8471   return 0;
8472 }
8473 
8474 /**
8475   Define NDB column based on Field.
8476 
8477   Not member of ha_ndbcluster because NDBCOL cannot be declared.
8478 
8479   MySQL text types with character set "binary" are mapped to true
8480   NDB binary types without a character set.
8481 
8482   Blobs are V2 and striping from mysql level is not supported
8483   due to lack of syntax and lack of support for partitioning.
8484 
8485   @return
8486     Returns 0 or mysql error code.
8487 */
8488 
8489 static bool
ndb_blob_striping()8490 ndb_blob_striping()
8491 {
8492 #ifndef DBUG_OFF
8493   const char* p= getenv("NDB_BLOB_STRIPING");
8494   if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
8495     return true;
8496 #endif
8497   return false;
8498 }
8499 
8500 #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
8501 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;
8502 #else
8503 const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;
8504 #endif
8505 
create_ndb_column(THD * thd,NDBCOL & col,Field * field,HA_CREATE_INFO * create_info,column_format_type default_format=COLUMN_FORMAT_TYPE_DEFAULT)8506 static int create_ndb_column(THD *thd,
8507                              NDBCOL &col,
8508                              Field *field,
8509                              HA_CREATE_INFO *create_info
8510 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8511                              , column_format_type
8512                                default_format= COLUMN_FORMAT_TYPE_DEFAULT
8513 #endif
8514                             )
8515 {
8516   NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;
8517   bool dynamic= FALSE;
8518 
8519   char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];
8520   DBUG_ENTER("create_ndb_column");
8521   // Set name
8522   if (col.setName(field->field_name))
8523   {
8524     DBUG_RETURN(my_errno= errno);
8525   }
8526   // Get char set
8527   CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());
8528   // Set type and sizes
8529   const enum enum_field_types mysql_type= field->real_type();
8530 
8531   NDB_Modifiers column_modifiers(ndb_column_modifiers);
8532   column_modifiers.parse(thd, "NDB_COLUMN=",
8533                          field->comment.str,
8534                          field->comment.length);
8535 
8536   const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");
8537 
8538   {
8539     /* Clear default value (col obj is reused for whole table def) */
8540     col.setDefaultValue(NULL, 0);
8541 
8542     /* If the data nodes are capable then set native
8543      * default.
8544      */
8545     bool nativeDefaults =
8546       ! (thd &&
8547          (! ndb_native_default_support(get_thd_ndb(thd)->
8548                                        ndb->getMinDbNodeVersion())));
8549 
8550     if (likely( nativeDefaults ))
8551     {
8552       if ((!(field->flags & PRI_KEY_FLAG) ) &&
8553           type_supports_default_value(mysql_type))
8554       {
8555         if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
8556         {
8557           my_ptrdiff_t src_offset= field->table->s->default_values
8558             - field->table->record[0];
8559           if ((! field->is_real_null(src_offset)) ||
8560               ((field->flags & NOT_NULL_FLAG)))
8561           {
8562             /* Set a non-null native default */
8563             memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);
8564             get_default_value(buf, field);
8565 
8566             /* For bit columns, default length is rounded up to
8567                nearest word, ensuring all data sent
8568             */
8569             Uint32 defaultLen = field_used_length(field);
8570             if(field->type() == MYSQL_TYPE_BIT)
8571               defaultLen = ((defaultLen + 3) /4) * 4;
8572             col.setDefaultValue(buf, defaultLen);
8573           }
8574         }
8575       }
8576     }
8577   }
8578   switch (mysql_type) {
8579   // Numeric types
8580   case MYSQL_TYPE_TINY:
8581     if (field->flags & UNSIGNED_FLAG)
8582       col.setType(NDBCOL::Tinyunsigned);
8583     else
8584       col.setType(NDBCOL::Tinyint);
8585     col.setLength(1);
8586     break;
8587   case MYSQL_TYPE_SHORT:
8588     if (field->flags & UNSIGNED_FLAG)
8589       col.setType(NDBCOL::Smallunsigned);
8590     else
8591       col.setType(NDBCOL::Smallint);
8592     col.setLength(1);
8593     break;
8594   case MYSQL_TYPE_LONG:
8595     if (field->flags & UNSIGNED_FLAG)
8596       col.setType(NDBCOL::Unsigned);
8597     else
8598       col.setType(NDBCOL::Int);
8599     col.setLength(1);
8600     break;
8601   case MYSQL_TYPE_INT24:
8602     if (field->flags & UNSIGNED_FLAG)
8603       col.setType(NDBCOL::Mediumunsigned);
8604     else
8605       col.setType(NDBCOL::Mediumint);
8606     col.setLength(1);
8607     break;
8608   case MYSQL_TYPE_LONGLONG:
8609     if (field->flags & UNSIGNED_FLAG)
8610       col.setType(NDBCOL::Bigunsigned);
8611     else
8612       col.setType(NDBCOL::Bigint);
8613     col.setLength(1);
8614     break;
8615   case MYSQL_TYPE_FLOAT:
8616     col.setType(NDBCOL::Float);
8617     col.setLength(1);
8618     break;
8619   case MYSQL_TYPE_DOUBLE:
8620     col.setType(NDBCOL::Double);
8621     col.setLength(1);
8622     break;
8623   case MYSQL_TYPE_DECIMAL:
8624     {
8625       Field_decimal *f= (Field_decimal*)field;
8626       uint precision= f->pack_length();
8627       uint scale= f->decimals();
8628       if (field->flags & UNSIGNED_FLAG)
8629       {
8630         col.setType(NDBCOL::Olddecimalunsigned);
8631         precision-= (scale > 0);
8632       }
8633       else
8634       {
8635         col.setType(NDBCOL::Olddecimal);
8636         precision-= 1 + (scale > 0);
8637       }
8638       col.setPrecision(precision);
8639       col.setScale(scale);
8640       col.setLength(1);
8641     }
8642     break;
8643   case MYSQL_TYPE_NEWDECIMAL:
8644     {
8645       Field_new_decimal *f= (Field_new_decimal*)field;
8646       uint precision= f->precision;
8647       uint scale= f->decimals();
8648       if (field->flags & UNSIGNED_FLAG)
8649       {
8650         col.setType(NDBCOL::Decimalunsigned);
8651       }
8652       else
8653       {
8654         col.setType(NDBCOL::Decimal);
8655       }
8656       col.setPrecision(precision);
8657       col.setScale(scale);
8658       col.setLength(1);
8659     }
8660     break;
8661   // Date types
8662   case MYSQL_TYPE_DATETIME:
8663     col.setType(NDBCOL::Datetime);
8664     col.setLength(1);
8665     break;
8666   case MYSQL_TYPE_DATE: // ?
8667     col.setType(NDBCOL::Char);
8668     col.setLength(field->pack_length());
8669     break;
8670   case MYSQL_TYPE_NEWDATE:
8671     col.setType(NDBCOL::Date);
8672     col.setLength(1);
8673     break;
8674   case MYSQL_TYPE_TIME:
8675     col.setType(NDBCOL::Time);
8676     col.setLength(1);
8677     break;
8678   case MYSQL_TYPE_YEAR:
8679     col.setType(NDBCOL::Year);
8680     col.setLength(1);
8681     break;
8682   case MYSQL_TYPE_TIMESTAMP:
8683     col.setType(NDBCOL::Timestamp);
8684     col.setLength(1);
8685     break;
8686   // Char types
8687   case MYSQL_TYPE_STRING:
8688     if (field->pack_length() == 0)
8689     {
8690       col.setType(NDBCOL::Bit);
8691       col.setLength(1);
8692     }
8693     else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8694     {
8695       col.setType(NDBCOL::Binary);
8696       col.setLength(field->pack_length());
8697     }
8698     else
8699     {
8700       col.setType(NDBCOL::Char);
8701       col.setCharset(cs);
8702       col.setLength(field->pack_length());
8703     }
8704     break;
8705   case MYSQL_TYPE_VAR_STRING: // ?
8706   case MYSQL_TYPE_VARCHAR:
8707     {
8708       Field_varstring* f= (Field_varstring*)field;
8709       if (f->length_bytes == 1)
8710       {
8711         if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8712           col.setType(NDBCOL::Varbinary);
8713         else {
8714           col.setType(NDBCOL::Varchar);
8715           col.setCharset(cs);
8716         }
8717       }
8718       else if (f->length_bytes == 2)
8719       {
8720         if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8721           col.setType(NDBCOL::Longvarbinary);
8722         else {
8723           col.setType(NDBCOL::Longvarchar);
8724           col.setCharset(cs);
8725         }
8726       }
8727       else
8728       {
8729         DBUG_RETURN(HA_ERR_UNSUPPORTED);
8730       }
8731       col.setLength(field->field_length);
8732     }
8733     break;
8734   // Blob types (all come in as MYSQL_TYPE_BLOB)
8735   mysql_type_tiny_blob:
8736   case MYSQL_TYPE_TINY_BLOB:
8737     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8738       col.setType(NDBCOL::Blob);
8739     else {
8740       col.setType(NDBCOL::Text);
8741       col.setCharset(cs);
8742     }
8743     col.setInlineSize(256);
8744     // No parts
8745     col.setPartSize(0);
8746     col.setStripeSize(ndb_blob_striping() ? 0 : 0);
8747     break;
8748   //mysql_type_blob:
8749   case MYSQL_TYPE_GEOMETRY:
8750   case MYSQL_TYPE_BLOB:
8751     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8752       col.setType(NDBCOL::Blob);
8753     else {
8754       col.setType(NDBCOL::Text);
8755       col.setCharset(cs);
8756     }
8757     {
8758       Field_blob *field_blob= (Field_blob *)field;
8759       /*
8760        * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
8761        * Tinyblob gets no blob parts.  The other cases are just a crude
8762        * way to control part size and striping.
8763        *
8764        * In mysql blob(256) is promoted to blob(65535) so it does not
8765        * in fact fit "inline" in NDB.
8766        */
8767       if (field_blob->max_data_length() < (1 << 8))
8768         goto mysql_type_tiny_blob;
8769       else if (field_blob->max_data_length() < (1 << 16))
8770       {
8771         col.setInlineSize(256);
8772         col.setPartSize(2000);
8773         col.setStripeSize(ndb_blob_striping() ? 16 : 0);
8774         if (mod_maxblob->m_found)
8775         {
8776           col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8777         }
8778       }
8779       else if (field_blob->max_data_length() < (1 << 24))
8780         goto mysql_type_medium_blob;
8781       else
8782         goto mysql_type_long_blob;
8783     }
8784     break;
8785   mysql_type_medium_blob:
8786   case MYSQL_TYPE_MEDIUM_BLOB:
8787     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8788       col.setType(NDBCOL::Blob);
8789     else {
8790       col.setType(NDBCOL::Text);
8791       col.setCharset(cs);
8792     }
8793     col.setInlineSize(256);
8794     col.setPartSize(4000);
8795     col.setStripeSize(ndb_blob_striping() ? 8 : 0);
8796     if (mod_maxblob->m_found)
8797     {
8798       col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8799     }
8800     break;
8801   mysql_type_long_blob:
8802   case MYSQL_TYPE_LONG_BLOB:
8803     if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
8804       col.setType(NDBCOL::Blob);
8805     else {
8806       col.setType(NDBCOL::Text);
8807       col.setCharset(cs);
8808     }
8809     col.setInlineSize(256);
8810     col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8811     col.setStripeSize(ndb_blob_striping() ? 4 : 0);
8812     if (mod_maxblob->m_found)
8813     {
8814       col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
8815     }
8816     break;
8817   // Other types
8818   case MYSQL_TYPE_ENUM:
8819     col.setType(NDBCOL::Char);
8820     col.setLength(field->pack_length());
8821     break;
8822   case MYSQL_TYPE_SET:
8823     col.setType(NDBCOL::Char);
8824     col.setLength(field->pack_length());
8825     break;
8826   case MYSQL_TYPE_BIT:
8827   {
8828     int no_of_bits= field->field_length;
8829     col.setType(NDBCOL::Bit);
8830     if (!no_of_bits)
8831       col.setLength(1);
8832       else
8833         col.setLength(no_of_bits);
8834     break;
8835   }
8836   case MYSQL_TYPE_NULL:
8837     goto mysql_type_unsupported;
8838   mysql_type_unsupported:
8839   default:
8840     DBUG_RETURN(HA_ERR_UNSUPPORTED);
8841   }
8842   // Set nullable and pk
8843   col.setNullable(field->maybe_null());
8844   col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
8845   if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)
8846   {
8847     col.setPartitionKey(TRUE);
8848   }
8849 
8850   // Set autoincrement
8851   if (field->flags & AUTO_INCREMENT_FLAG)
8852   {
8853 #ifndef DBUG_OFF
8854     char buff[22];
8855 #endif
8856     col.setAutoIncrement(TRUE);
8857     ulonglong value= create_info->auto_increment_value ?
8858       create_info->auto_increment_value : (ulonglong) 1;
8859     DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));
8860     col.setAutoIncrementInitialValue(value);
8861   }
8862   else
8863     col.setAutoIncrement(FALSE);
8864 
8865 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8866   DBUG_PRINT("info", ("storage: %u  format: %u  ",
8867                       field->field_storage_type(),
8868                       field->column_format()));
8869   switch (field->field_storage_type()) {
8870   case(HA_SM_DEFAULT):
8871   default:
8872     if (create_info->storage_media == HA_SM_DISK)
8873       type= NDBCOL::StorageTypeDisk;
8874     else
8875       type= NDBCOL::StorageTypeMemory;
8876     break;
8877   case(HA_SM_DISK):
8878     type= NDBCOL::StorageTypeDisk;
8879     break;
8880   case(HA_SM_MEMORY):
8881     type= NDBCOL::StorageTypeMemory;
8882     break;
8883   }
8884 
8885   switch (field->column_format()) {
8886   case(COLUMN_FORMAT_TYPE_FIXED):
8887     dynamic= FALSE;
8888     break;
8889   case(COLUMN_FORMAT_TYPE_DYNAMIC):
8890     dynamic= TRUE;
8891     break;
8892   case(COLUMN_FORMAT_TYPE_DEFAULT):
8893   default:
8894     if (create_info->row_type == ROW_TYPE_DEFAULT)
8895       dynamic= default_format;
8896     else
8897       dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);
8898     break;
8899   }
8900 #endif
8901   DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,
8902                       (dynamic) ? "dynamic" : "static"));
8903   if (type == NDBCOL::StorageTypeDisk)
8904   {
8905     if (dynamic)
8906     {
8907       DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",
8908                           field->field_name));
8909       dynamic= false;
8910     }
8911 
8912 #ifndef NDB_WITHOUT_COLUMN_FORMAT
8913     if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)
8914     {
8915       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8916                           ER_ILLEGAL_HA_CREATE_OPTION,
8917                           "DYNAMIC column %s with "
8918                           "STORAGE DISK is not supported, "
8919                           "column will become FIXED",
8920                           field->field_name);
8921     }
8922 #endif
8923   }
8924 
8925   switch (create_info->row_type) {
8926   case ROW_TYPE_FIXED:
8927     if (thd && (dynamic || field_type_forces_var_part(field->type())))
8928     {
8929       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
8930                           ER_ILLEGAL_HA_CREATE_OPTION,
8931                           "Row format FIXED incompatible with "
8932                           "dynamic attribute %s",
8933                           field->field_name);
8934     }
8935     break;
8936   case ROW_TYPE_DYNAMIC:
8937     /*
8938       Future: make columns dynamic in this case
8939     */
8940     break;
8941   default:
8942     break;
8943   }
8944 
8945   DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));
8946   col.setStorageType(type);
8947   col.setDynamic(dynamic);
8948 
8949   DBUG_RETURN(0);
8950 }
8951 
update_create_info(HA_CREATE_INFO * create_info)8952 void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)
8953 {
8954   DBUG_ENTER("ha_ndbcluster::update_create_info");
8955   THD *thd= current_thd;
8956   const NDBTAB *ndbtab= m_table;
8957   Ndb *ndb= check_ndb_in_thd(thd);
8958 
8959   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
8960   {
8961     /*
8962       Find any initial auto_increment value
8963     */
8964     for (uint i= 0; i < table->s->fields; i++)
8965     {
8966       Field *field= table->field[i];
8967       if (field->flags & AUTO_INCREMENT_FLAG)
8968       {
8969         ulonglong auto_value;
8970         uint retries= NDB_AUTO_INCREMENT_RETRIES;
8971         int retry_sleep= 30; /* 30 milliseconds, transaction */
8972         for (;;)
8973         {
8974           Ndb_tuple_id_range_guard g(m_share);
8975           if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))
8976           {
8977             if (--retries && !thd->killed &&
8978                 ndb->getNdbError().status == NdbError::TemporaryError)
8979             {
8980               do_retry_sleep(retry_sleep);
8981               continue;
8982             }
8983             const NdbError err= ndb->getNdbError();
8984             sql_print_error("Error %lu in ::update_create_info(): %s",
8985                             (ulong) err.code, err.message);
8986             DBUG_VOID_RETURN;
8987           }
8988           break;
8989         }
8990         if (auto_value > 1)
8991         {
8992           create_info->auto_increment_value= auto_value;
8993         }
8994         break;
8995       }
8996     }
8997   }
8998 
8999   DBUG_VOID_RETURN;
9000 }
9001 
9002 /*
9003   Create a table in NDB Cluster
9004  */
get_no_fragments(ulonglong max_rows)9005 static uint get_no_fragments(ulonglong max_rows)
9006 {
9007   ulonglong acc_row_size= 25 + /*safety margin*/ 2;
9008   ulonglong acc_fragment_size= 512*1024*1024;
9009   return uint((max_rows*acc_row_size)/acc_fragment_size)+1;
9010 }
9011 
9012 
9013 /*
9014   Routine to adjust default number of partitions to always be a multiple
9015   of number of nodes and never more than 4 times the number of nodes.
9016 
9017 */
9018 static
9019 bool
adjusted_frag_count(Ndb * ndb,uint requested_frags,uint & reported_frags)9020 adjusted_frag_count(Ndb* ndb,
9021                     uint requested_frags,
9022                     uint &reported_frags)
9023 {
9024   unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();
9025   unsigned no_replicas= no_nodes == 1 ? 1 : 2;
9026 
9027   unsigned no_threads= 1;
9028   const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;
9029 
9030   {
9031     /**
9032      * Use SYSTAB_0 to get #replicas, and to guess #threads
9033      */
9034     char dbname[FN_HEADLEN+1];
9035     dbname[FN_HEADLEN]= 0;
9036     strnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);
9037     ndb->setDatabaseName("sys");
9038     Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");
9039     const NdbDictionary::Table * tab = ndbtab_g.get_table();
9040     if (tab)
9041     {
9042       no_replicas= ndbtab_g.get_table()->getReplicaCount();
9043 
9044       /**
9045        * Guess #threads
9046        */
9047       {
9048         const Uint32 frags = tab->getFragmentCount();
9049         Uint32 node = 0;
9050         Uint32 cnt = 0;
9051         for (Uint32 i = 0; i<frags; i++)
9052         {
9053           Uint32 replicas[4];
9054           if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))
9055           {
9056             if (node == replicas[0] || node == 0)
9057             {
9058               node = replicas[0];
9059               cnt ++;
9060             }
9061           }
9062         }
9063         no_threads = cnt; // No of primary replica on 1-node
9064       }
9065     }
9066     ndb->setDatabaseName(dbname);
9067   }
9068 
9069   const unsigned usable_nodes = no_replicas * no_nodegroups;
9070   const uint max_replicas = 8 * usable_nodes * no_threads;
9071 
9072   reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads
9073   Uint32 replicas = reported_frags * no_replicas;
9074 
9075   /**
9076    * Loop until requested replicas, and not exceed max-replicas
9077    */
9078   while (reported_frags < requested_frags &&
9079          (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)
9080   {
9081     reported_frags += usable_nodes * no_threads;
9082     replicas += usable_nodes * no_threads * no_replicas;
9083   }
9084 
9085   return (reported_frags < requested_frags);
9086 }
9087 
9088 
9089 /**
9090   Create a table in NDB Cluster
9091 */
9092 
create(const char * name,TABLE * form,HA_CREATE_INFO * create_info)9093 int ha_ndbcluster::create(const char *name,
9094                           TABLE *form,
9095                           HA_CREATE_INFO *create_info)
9096 {
9097   THD *thd= current_thd;
9098   NDBTAB tab;
9099   NDBCOL col;
9100   size_t pack_length, length;
9101   uint i, pk_length= 0;
9102   uchar *data= NULL, *pack_data= NULL;
9103   bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);
9104   bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
9105   bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
9106   bool use_disk= FALSE;
9107   NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
9108   bool ndb_sys_table= FALSE;
9109   int result= 0;
9110   NdbDictionary::ObjectId objId;
9111 
9112   DBUG_ENTER("ha_ndbcluster::create");
9113   DBUG_PRINT("enter", ("name: %s", name));
9114 
9115   if (create_temporary)
9116   {
9117     /*
9118       Ndb does not support temporary tables
9119      */
9120     my_errno= ER_ILLEGAL_HA_CREATE_OPTION;
9121     DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));
9122     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9123                         ER_ILLEGAL_HA_CREATE_OPTION,
9124                         "Ndb doesn't support temporary tables");
9125     DBUG_RETURN(my_errno);
9126   }
9127 
9128   DBUG_ASSERT(*fn_rext((char*)name) == 0);
9129   set_dbname(name);
9130   set_tabname(name);
9131 
9132   if ((my_errno= check_ndb_connection(thd)))
9133     DBUG_RETURN(my_errno);
9134 
9135   Ndb *ndb= get_ndb(thd);
9136   NDBDICT *dict= ndb->getDictionary();
9137 
9138   table= form;
9139   if (create_from_engine)
9140   {
9141     /*
9142       Table already exists in NDB and frm file has been created by
9143       caller.
9144       Do Ndb specific stuff, such as create a .ndb file
9145     */
9146     if ((my_errno= write_ndb_file(name)))
9147       DBUG_RETURN(my_errno);
9148 
9149     ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),
9150                                    m_dbname, m_tabname, form);
9151     DBUG_RETURN(my_errno);
9152   }
9153 
9154   Thd_ndb *thd_ndb= get_thd_ndb(thd);
9155 
9156   if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||
9157         thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))
9158 
9159     DBUG_RETURN(HA_ERR_NO_CONNECTION);
9160 
9161   /*
9162     Don't allow table creation unless
9163     schema distribution table is setup
9164     ( unless it is a creation of the schema dist table itself )
9165   */
9166   if (!ndb_schema_share)
9167   {
9168     if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
9169           strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
9170     {
9171       DBUG_PRINT("info", ("Schema distribution table not setup"));
9172       DBUG_RETURN(HA_ERR_NO_CONNECTION);
9173     }
9174     single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
9175     ndb_sys_table= TRUE;
9176   }
9177 
9178   if (!ndb_apply_status_share)
9179   {
9180     if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&
9181          strcmp(m_tabname, NDB_APPLY_TABLE) == 0))
9182     {
9183       ndb_sys_table= TRUE;
9184     }
9185   }
9186 
9187   if (is_truncate)
9188   {
9189     Ndb_table_guard ndbtab_g(dict);
9190     ndbtab_g.init(m_tabname);
9191     if (!(m_table= ndbtab_g.get_table()))
9192       ERR_RETURN(dict->getNdbError());
9193     m_table= NULL;
9194     DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
9195     if ((my_errno= delete_table(name)))
9196       DBUG_RETURN(my_errno);
9197     ndbtab_g.reinit();
9198   }
9199 
9200   NDB_Modifiers table_modifiers(ndb_table_modifiers);
9201   table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
9202                         create_info->comment.length);
9203   const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");
9204 
9205 #ifdef HAVE_NDB_BINLOG
9206   /* Read ndb_replication entry for this table, if any */
9207   Uint32 binlog_flags;
9208   const st_conflict_fn_def* conflict_fn= NULL;
9209   st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
9210   Uint32 num_args = MAX_CONFLICT_ARGS;
9211 
9212   int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,
9213                                                           ndb,
9214                                                           m_dbname,
9215                                                           m_tabname,
9216                                                           ::server_id,
9217                                                           form,
9218                                                           &binlog_flags,
9219                                                           &conflict_fn,
9220                                                           args,
9221                                                           &num_args);
9222   if (rep_read_rc != 0)
9223   {
9224     DBUG_RETURN(rep_read_rc);
9225   }
9226 
9227   /* Reset database name */
9228   ndb->setDatabaseName(m_dbname);
9229 
9230   /* Use ndb_replication information as required */
9231   if (conflict_fn != NULL)
9232   {
9233     switch(conflict_fn->type)
9234     {
9235     case CFT_NDB_EPOCH:
9236     {
9237       /* Default 6 extra Gci bits allows 2^6 == 64
9238        * epochs / saveGCP, a comfortable default
9239        */
9240       Uint32 numExtraGciBits = 6;
9241       Uint32 numExtraAuthorBits = 1;
9242 
9243       if ((num_args == 1) &&
9244           (args[0].type == CFAT_EXTRA_GCI_BITS))
9245       {
9246         numExtraGciBits = args[0].extraGciBits;
9247       }
9248       DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "
9249                           "ExtraAuthorBits to %u",
9250                           numExtraGciBits,
9251                           numExtraAuthorBits));
9252 
9253       tab.setExtraRowGciBits(numExtraGciBits);
9254       tab.setExtraRowAuthorBits(numExtraAuthorBits);
9255     }
9256     default:
9257       break;
9258     }
9259   }
9260 #endif
9261 
9262   if ((dict->beginSchemaTrans() == -1))
9263   {
9264     DBUG_PRINT("info", ("Failed to start schema transaction"));
9265     goto err_return;
9266   }
9267   DBUG_PRINT("info", ("Started schema transaction"));
9268 
9269   DBUG_PRINT("table", ("name: %s", m_tabname));
9270   if (tab.setName(m_tabname))
9271   {
9272     my_errno= errno;
9273     goto abort;
9274   }
9275   if (!ndb_sys_table)
9276   {
9277     if (THDVAR(thd, table_temporary))
9278     {
9279 #ifdef DOES_NOT_WORK_CURRENTLY
9280       tab.setTemporary(TRUE);
9281 #endif
9282       tab.setLogging(FALSE);
9283     }
9284     else if (THDVAR(thd, table_no_logging))
9285     {
9286       tab.setLogging(FALSE);
9287     }
9288 
9289     if (mod_nologging->m_found)
9290     {
9291       tab.setLogging(!mod_nologging->m_val_bool);
9292     }
9293   }
9294   tab.setSingleUserMode(single_user_mode);
9295 
9296   // Save frm data for this table
9297   if (readfrm(name, &data, &length))
9298   {
9299     result= 1;
9300     goto abort_return;
9301   }
9302   if (packfrm(data, length, &pack_data, &pack_length))
9303   {
9304     my_free((char*)data, MYF(0));
9305     result= 2;
9306     goto abort_return;
9307   }
9308   DBUG_PRINT("info",
9309              ("setFrm data: 0x%lx  len: %lu", (long) pack_data,
9310               (ulong) pack_length));
9311   tab.setFrm(pack_data, Uint32(pack_length));
9312   my_free((char*)data, MYF(0));
9313   my_free((char*)pack_data, MYF(0));
9314 
9315   /*
9316     Handle table row type
9317 
9318     Default is to let table rows have var part reference so that online
9319     add column can be performed in the future.  Explicitly setting row
9320     type to fixed will omit var part reference, which will save data
9321     memory in ndb, but at the cost of not being able to online add
9322     column to this table
9323   */
9324   switch (create_info->row_type) {
9325   case ROW_TYPE_FIXED:
9326     tab.setForceVarPart(FALSE);
9327     break;
9328   case ROW_TYPE_DYNAMIC:
9329     /* fall through, treat as default */
9330   default:
9331     /* fall through, treat as default */
9332   case ROW_TYPE_DEFAULT:
9333     tab.setForceVarPart(TRUE);
9334     break;
9335   }
9336 
9337   /*
9338     Setup columns
9339   */
9340   my_bitmap_map *old_map;
9341   {
9342     restore_record(form, s->default_values);
9343     old_map= tmp_use_all_columns(form, form->read_set);
9344   }
9345 
9346   for (i= 0; i < form->s->fields; i++)
9347   {
9348     Field *field= form->field[i];
9349     DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",
9350                         field->field_name, field->real_type(),
9351                         field->pack_length()));
9352     if ((my_errno= create_ndb_column(thd, col, field, create_info)))
9353       goto abort;
9354 
9355     if (!use_disk &&
9356         col.getStorageType() == NDBCOL::StorageTypeDisk)
9357       use_disk= TRUE;
9358 
9359     if (tab.addColumn(col))
9360     {
9361       my_errno= errno;
9362       goto abort;
9363     }
9364     if (col.getPrimaryKey())
9365       pk_length += (field->pack_length() + 3) / 4;
9366   }
9367 
9368   tmp_restore_column_map(form->read_set, old_map);
9369   if (use_disk)
9370   {
9371     tab.setLogging(TRUE);
9372     tab.setTemporary(FALSE);
9373     if (create_info->tablespace)
9374       tab.setTablespaceName(create_info->tablespace);
9375     else
9376       tab.setTablespaceName("DEFAULT-TS");
9377   }
9378 
9379   // Save the table level storage media setting
9380   switch(create_info->storage_media)
9381   {
9382     case HA_SM_DISK:
9383       tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);
9384       break;
9385     case HA_SM_DEFAULT:
9386       tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);
9387       break;
9388     case HA_SM_MEMORY:
9389       tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);
9390       break;
9391   }
9392 
9393   DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",
9394                       m_tabname,
9395                       (use_disk) ? "disk" : "memory",
9396                       (use_disk) ? tab.getTablespaceName() : "N/A"));
9397 
9398   KEY* key_info;
9399   for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
9400   {
9401     KEY_PART_INFO *key_part= key_info->key_part;
9402     KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
9403     for (; key_part != end; key_part++)
9404     {
9405 #ifndef NDB_WITHOUT_COLUMN_FORMAT
9406       if (key_part->field->field_storage_type() == HA_SM_DISK)
9407       {
9408         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9409                             ER_ILLEGAL_HA_CREATE_OPTION,
9410                             ER(ER_ILLEGAL_HA_CREATE_OPTION),
9411                             ndbcluster_hton_name,
9412                             "Index on field "
9413                             "declared with "
9414                             "STORAGE DISK is not supported");
9415         result= HA_ERR_UNSUPPORTED;
9416         goto abort_return;
9417       }
9418 #endif
9419       tab.getColumn(key_part->fieldnr-1)->setStorageType(
9420                              NdbDictionary::Column::StorageTypeMemory);
9421     }
9422   }
9423 
9424   // No primary key, create shadow key as 64 bit, auto increment
9425   if (form->s->primary_key == MAX_KEY)
9426   {
9427     DBUG_PRINT("info", ("Generating shadow key"));
9428     if (col.setName("$PK"))
9429     {
9430       my_errno= errno;
9431       goto abort;
9432     }
9433     col.setType(NdbDictionary::Column::Bigunsigned);
9434     col.setLength(1);
9435     col.setNullable(FALSE);
9436     col.setPrimaryKey(TRUE);
9437     col.setAutoIncrement(TRUE);
9438     col.setDefaultValue(NULL, 0);
9439     if (tab.addColumn(col))
9440     {
9441       my_errno= errno;
9442       goto abort;
9443     }
9444     pk_length += 2;
9445   }
9446 
9447   // Make sure that blob tables don't have too big part size
9448   for (i= 0; i < form->s->fields; i++)
9449   {
9450     /**
9451      * The extra +7 concists
9452      * 2 - words from pk in blob table
9453      * 5 - from extra words added by tup/dict??
9454      */
9455 
9456     // To be upgrade/downgrade safe...we currently use
9457     // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set
9458     switch (form->field[i]->real_type()) {
9459     case MYSQL_TYPE_GEOMETRY:
9460     case MYSQL_TYPE_BLOB:
9461     case MYSQL_TYPE_MEDIUM_BLOB:
9462     case MYSQL_TYPE_LONG_BLOB:
9463     {
9464       NdbDictionary::Column * column= tab.getColumn(i);
9465       unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;
9466       unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;
9467       if (column->getPartSize() > (int)(4 * ndb_max))
9468         ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE
9469 
9470       if (size > ndb_max &&
9471           (pk_length+7) < ndb_max)
9472       {
9473         size= ndb_max - pk_length - 7;
9474         column->setPartSize(4*size);
9475       }
9476       /**
9477        * If size > NDB_MAX and pk_length+7 >= NDB_MAX
9478        *   then the table can't be created anyway, so skip
9479        *   changing part size, and have error later
9480        */
9481     }
9482     default:
9483       break;
9484     }
9485   }
9486 
9487   // Check partition info
9488   if ((my_errno= set_up_partition_info(form->part_info, tab)))
9489     goto abort;
9490 
9491   if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
9492       tab.getDefaultNoPartitionsFlag() &&
9493       (create_info->max_rows != 0 || create_info->min_rows != 0))
9494   {
9495     ulonglong rows= create_info->max_rows >= create_info->min_rows ?
9496       create_info->max_rows :
9497       create_info->min_rows;
9498     uint no_fragments= get_no_fragments(rows);
9499     uint reported_frags= no_fragments;
9500     if (adjusted_frag_count(ndb, no_fragments, reported_frags))
9501     {
9502       push_warning(current_thd,
9503                    Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
9504                    "Ndb might have problems storing the max amount "
9505                    "of rows specified");
9506     }
9507     tab.setFragmentCount(reported_frags);
9508     tab.setDefaultNoPartitionsFlag(false);
9509     tab.setFragmentData(0, 0);
9510   }
9511 
9512   // Check for HashMap
9513   if (tab.getFragmentType() == NDBTAB::HashMapPartition &&
9514       tab.getDefaultNoPartitionsFlag())
9515   {
9516     tab.setFragmentCount(0);
9517     tab.setFragmentData(0, 0);
9518   }
9519   else if (tab.getFragmentType() == NDBTAB::HashMapPartition)
9520   {
9521     NdbDictionary::HashMap hm;
9522     int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());
9523     if (res == -1)
9524     {
9525       res= dict->initDefaultHashMap(hm, tab.getFragmentCount());
9526       if (res == -1)
9527       {
9528         const NdbError err= dict->getNdbError();
9529         my_errno= ndb_to_mysql_error(&err);
9530         goto abort;
9531       }
9532 
9533       res= dict->createHashMap(hm);
9534       if (res == -1)
9535       {
9536         const NdbError err= dict->getNdbError();
9537         my_errno= ndb_to_mysql_error(&err);
9538         goto abort;
9539       }
9540     }
9541   }
9542 
9543   // Create the table in NDB
9544   if (dict->createTable(tab, &objId) != 0)
9545   {
9546     const NdbError err= dict->getNdbError();
9547     my_errno= ndb_to_mysql_error(&err);
9548     goto abort;
9549   }
9550 
9551   DBUG_PRINT("info", ("Table %s/%s created successfully",
9552                       m_dbname, m_tabname));
9553 
9554   // Create secondary indexes
9555   tab.assignObjId(objId);
9556   m_table= &tab;
9557   my_errno= create_indexes(thd, ndb, form);
9558   m_table= 0;
9559 
9560   if (!my_errno)
9561   {
9562     /*
9563      * All steps have succeeded, try and commit schema transaction
9564      */
9565     if (dict->endSchemaTrans() == -1)
9566       goto err_return;
9567     my_errno= write_ndb_file(name);
9568   }
9569   else
9570   {
9571 abort:
9572 /*
9573  *  Some step during table creation failed, abort schema transaction
9574  */
9575     DBUG_PRINT("info", ("Aborting schema transaction due to error %i",
9576                         my_errno));
9577     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9578         == -1)
9579       DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9580                           dict->getNdbError().code));
9581     m_table= 0;
9582     DBUG_RETURN(my_errno);
9583 abort_return:
9584     DBUG_PRINT("info", ("Aborting schema transaction"));
9585     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9586         == -1)
9587       DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9588                           dict->getNdbError().code));
9589     DBUG_RETURN(result);
9590 err_return:
9591     m_table= 0;
9592     ERR_RETURN(dict->getNdbError());
9593   }
9594 
9595   /**
9596    * createTable/index schema transaction OK
9597    */
9598   Ndb_table_guard ndbtab_g(dict, m_tabname);
9599   m_table= ndbtab_g.get_table();
9600 
9601   if (my_errno)
9602   {
9603     /*
9604       Failed to create an index,
9605       drop the table (and all it's indexes)
9606     */
9607     while (!thd->killed)
9608     {
9609       if (dict->beginSchemaTrans() == -1)
9610         goto cleanup_failed;
9611       if (dict->dropTableGlobal(*m_table))
9612       {
9613         switch (dict->getNdbError().status)
9614         {
9615         case NdbError::TemporaryError:
9616           if (!thd->killed)
9617           {
9618             if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
9619                 == -1)
9620               DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
9621                                   dict->getNdbError().code));
9622             goto cleanup_failed;
9623           }
9624           break;
9625         default:
9626           break;
9627         }
9628       }
9629       if (dict->endSchemaTrans() == -1)
9630       {
9631 cleanup_failed:
9632         DBUG_PRINT("info", ("Could not cleanup failed create %i",
9633                           dict->getNdbError().code));
9634         continue; // retry indefinitly
9635       }
9636       break;
9637     }
9638     m_table = 0;
9639     DBUG_RETURN(my_errno);
9640   }
9641   else // if (!my_errno)
9642   {
9643     NDB_SHARE *share= 0;
9644     pthread_mutex_lock(&ndbcluster_mutex);
9645     /*
9646       First make sure we get a "fresh" share here, not an old trailing one...
9647     */
9648     {
9649       uint length= (uint) strlen(name);
9650       if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
9651                                               (const uchar*) name, length)))
9652         handle_trailing_share(thd, share);
9653     }
9654     /*
9655       get a new share
9656     */
9657 
9658     /* ndb_share reference create */
9659     if (!(share= get_share(name, form, TRUE, TRUE)))
9660     {
9661       sql_print_error("NDB: allocating table share for %s failed", name);
9662       /* my_errno is set */
9663     }
9664     else
9665     {
9666       DBUG_PRINT("NDB_SHARE", ("%s binlog create  use_count: %u",
9667                                share->key, share->use_count));
9668     }
9669     pthread_mutex_unlock(&ndbcluster_mutex);
9670 
9671     while (!IS_TMP_PREFIX(m_tabname))
9672     {
9673 #ifdef HAVE_NDB_BINLOG
9674       if (share)
9675       {
9676         /* Set the Binlogging information we retrieved above */
9677         ndbcluster_apply_binlog_replication_info(thd,
9678                                                  share,
9679                                                  m_table,
9680                                                  form,
9681                                                  conflict_fn,
9682                                                  args,
9683                                                  num_args,
9684                                                  TRUE, /* Do set binlog flags */
9685                                                  binlog_flags);
9686       }
9687 #endif
9688       String event_name(INJECTOR_EVENT_LEN);
9689       ndb_rep_event_name(&event_name, m_dbname, m_tabname,
9690                          get_binlog_full(share));
9691       int do_event_op= ndb_binlog_running;
9692 
9693       if (!ndb_schema_share &&
9694           strcmp(share->db, NDB_REP_DB) == 0 &&
9695           strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
9696         do_event_op= 1;
9697 
9698       /*
9699         Always create an event for the table, as other mysql servers
9700         expect it to be there.
9701       */
9702       if (!ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,
9703                                    share && do_event_op ? 2 : 1/* push warning */))
9704       {
9705         if (opt_ndb_extra_logging)
9706           sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
9707                                 event_name.c_ptr());
9708         if (share &&
9709             ndbcluster_create_event_ops(thd, share,
9710                                         m_table, event_name.c_ptr()))
9711         {
9712           sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
9713                           " Event: %s", name);
9714           /* a warning has been issued to the client */
9715         }
9716       }
9717       /*
9718         warning has been issued if ndbcluster_create_event failed
9719         and (share && do_event_op)
9720       */
9721       if (share && !do_event_op)
9722         set_binlog_nologging(share);
9723       ndbcluster_log_schema_op(thd,
9724                                thd->query(), thd->query_length(),
9725                                share->db, share->table_name,
9726                                m_table->getObjectId(),
9727                                m_table->getObjectVersion(),
9728                                (is_truncate) ?
9729 			       SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE,
9730 			       NULL, NULL);
9731       break;
9732     }
9733   }
9734 
9735   m_table= 0;
9736   DBUG_RETURN(my_errno);
9737 }
9738 
9739 
create_index(THD * thd,const char * name,KEY * key_info,NDB_INDEX_TYPE idx_type,uint idx_no)9740 int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info,
9741                                 NDB_INDEX_TYPE idx_type, uint idx_no)
9742 {
9743   int error= 0;
9744   char unique_name[FN_LEN + 1];
9745   static const char* unique_suffix= "$unique";
9746   DBUG_ENTER("ha_ndbcluster::create_ordered_index");
9747   DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));
9748 
9749   if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
9750   {
9751     strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
9752     DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
9753                         unique_name, idx_no));
9754   }
9755 
9756   switch (idx_type){
9757   case PRIMARY_KEY_INDEX:
9758     // Do nothing, already created
9759     break;
9760   case PRIMARY_KEY_ORDERED_INDEX:
9761     error= create_ordered_index(thd, name, key_info);
9762     break;
9763   case UNIQUE_ORDERED_INDEX:
9764     if (!(error= create_ordered_index(thd, name, key_info)))
9765       error= create_unique_index(thd, unique_name, key_info);
9766     break;
9767   case UNIQUE_INDEX:
9768     if (check_index_fields_not_null(key_info))
9769     {
9770       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9771 			  ER_NULL_COLUMN_IN_INDEX,
9772 			  "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
9773     }
9774     error= create_unique_index(thd, unique_name, key_info);
9775     break;
9776   case ORDERED_INDEX:
9777     if (key_info->algorithm == HA_KEY_ALG_HASH)
9778     {
9779       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9780 			  ER_ILLEGAL_HA_CREATE_OPTION,
9781 			  ER(ER_ILLEGAL_HA_CREATE_OPTION),
9782 			  ndbcluster_hton_name,
9783 			  "Ndb does not support non-unique "
9784 			  "hash based indexes");
9785       error= HA_ERR_UNSUPPORTED;
9786       break;
9787     }
9788     error= create_ordered_index(thd, name, key_info);
9789     break;
9790   default:
9791     DBUG_ASSERT(FALSE);
9792     break;
9793   }
9794 
9795   DBUG_RETURN(error);
9796 }
9797 
create_ordered_index(THD * thd,const char * name,KEY * key_info)9798 int ha_ndbcluster::create_ordered_index(THD *thd, const char *name,
9799                                         KEY *key_info)
9800 {
9801   DBUG_ENTER("ha_ndbcluster::create_ordered_index");
9802   DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));
9803 }
9804 
create_unique_index(THD * thd,const char * name,KEY * key_info)9805 int ha_ndbcluster::create_unique_index(THD *thd, const char *name,
9806                                        KEY *key_info)
9807 {
9808 
9809   DBUG_ENTER("ha_ndbcluster::create_unique_index");
9810   DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));
9811 }
9812 
9813 
9814 /**
9815   Create an index in NDB Cluster.
9816 
9817   @todo
9818     Only temporary ordered indexes supported
9819 */
9820 
create_ndb_index(THD * thd,const char * name,KEY * key_info,bool unique)9821 int ha_ndbcluster::create_ndb_index(THD *thd, const char *name,
9822                                     KEY *key_info,
9823                                     bool unique)
9824 {
9825   char index_name[FN_LEN + 1];
9826   Ndb *ndb= get_ndb(thd);
9827   NdbDictionary::Dictionary *dict= ndb->getDictionary();
9828   KEY_PART_INFO *key_part= key_info->key_part;
9829   KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
9830 
9831   DBUG_ENTER("ha_ndbcluster::create_index");
9832   DBUG_PRINT("enter", ("name: %s ", name));
9833 
9834   ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');
9835   DBUG_PRINT("info", ("index name: %s ", index_name));
9836 
9837   NdbDictionary::Index ndb_index(index_name);
9838   if (unique)
9839     ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
9840   else
9841   {
9842     ndb_index.setType(NdbDictionary::Index::OrderedIndex);
9843     // TODO Only temporary ordered indexes supported
9844     ndb_index.setLogging(FALSE);
9845   }
9846   if (!m_table->getLogging())
9847     ndb_index.setLogging(FALSE);
9848   if (((NDBTAB*)m_table)->getTemporary())
9849     ndb_index.setTemporary(TRUE);
9850   if (ndb_index.setTable(m_tabname))
9851   {
9852     DBUG_RETURN(my_errno= errno);
9853   }
9854 
9855   for (; key_part != end; key_part++)
9856   {
9857     Field *field= key_part->field;
9858 #ifndef NDB_WITHOUT_COLUMN_FORMAT
9859     if (field->field_storage_type() == HA_SM_DISK)
9860     {
9861       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
9862                           ER_ILLEGAL_HA_CREATE_OPTION,
9863                           ER(ER_ILLEGAL_HA_CREATE_OPTION),
9864                           ndbcluster_hton_name,
9865                           "Index on field "
9866                           "declared with "
9867                           "STORAGE DISK is not supported");
9868       DBUG_RETURN(HA_ERR_UNSUPPORTED);
9869     }
9870 #endif
9871     DBUG_PRINT("info", ("attr: %s", field->field_name));
9872     if (ndb_index.addColumnName(field->field_name))
9873     {
9874       DBUG_RETURN(my_errno= errno);
9875     }
9876   }
9877 
9878   if (dict->createIndex(ndb_index, *m_table))
9879     ERR_RETURN(dict->getNdbError());
9880 
9881   // Success
9882   DBUG_PRINT("info", ("Created index %s", name));
9883   DBUG_RETURN(0);
9884 }
9885 
add_index_impl(THD * thd,TABLE * table_arg,KEY * key_info,uint num_of_keys)9886 int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg,
9887                                   KEY *key_info, uint num_of_keys)
9888 {
9889   int error= 0;
9890   uint idx;
9891   DBUG_ENTER("ha_ndbcluster::add_index");
9892   DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
9893   DBUG_ASSERT(m_share->state == NSS_ALTERED);
9894 
9895   for (idx= 0; idx < num_of_keys; idx++)
9896   {
9897     KEY *key= key_info + idx;
9898     KEY_PART_INFO *key_part= key->key_part;
9899     KEY_PART_INFO *end= key_part + key->user_defined_key_parts;
9900     NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
9901     DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
9902     // Add fields to key_part struct
9903     for (; key_part != end; key_part++)
9904       key_part->field= table->field[key_part->fieldnr];
9905     // Check index type
9906     // Create index in ndb
9907     if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))
9908       break;
9909   }
9910   DBUG_RETURN(error);
9911 }
9912 
9913 /**
9914   Rename a table in NDB Cluster.
9915 */
9916 
rename_table(const char * from,const char * to)9917 int ha_ndbcluster::rename_table(const char *from, const char *to)
9918 {
9919   THD *thd= current_thd;
9920   NDBDICT *dict;
9921   char old_dbname[FN_HEADLEN];
9922   char new_dbname[FN_HEADLEN];
9923   char new_tabname[FN_HEADLEN];
9924   const NDBTAB *orig_tab;
9925   int result;
9926   bool recreate_indexes= FALSE;
9927   NDBDICT::List index_list;
9928 
9929   DBUG_ENTER("ha_ndbcluster::rename_table");
9930   DBUG_PRINT("info", ("Renaming %s to %s", from, to));
9931 
9932   if (thd == injector_thd)
9933   {
9934     /*
9935       Table was renamed remotely is already
9936       renamed inside ndb.
9937       Just rename .ndb file.
9938      */
9939     DBUG_RETURN(handler::rename_table(from, to));
9940   }
9941 
9942   set_dbname(from, old_dbname);
9943   set_dbname(to, new_dbname);
9944   set_tabname(from);
9945   set_tabname(to, new_tabname);
9946 
9947   if (check_ndb_connection(thd))
9948     DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);
9949 
9950   Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);
9951   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
9952     DBUG_RETURN(HA_ERR_NO_CONNECTION);
9953 
9954   Ndb *ndb= get_ndb(thd);
9955   ndb->setDatabaseName(old_dbname);
9956   dict= ndb->getDictionary();
9957   Ndb_table_guard ndbtab_g(dict, m_tabname);
9958   if (!(orig_tab= ndbtab_g.get_table()))
9959     ERR_RETURN(dict->getNdbError());
9960 
9961   if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
9962   {
9963     dict->listIndexes(index_list, *orig_tab);
9964     recreate_indexes= TRUE;
9965   }
9966   // Change current database to that of target table
9967   set_dbname(to);
9968   if (ndb->setDatabaseName(m_dbname))
9969   {
9970     ERR_RETURN(ndb->getNdbError());
9971   }
9972 
9973   int ndb_table_id= orig_tab->getObjectId();
9974   int ndb_table_version= orig_tab->getObjectVersion();
9975   /* ndb_share reference temporary */
9976   NDB_SHARE *share= get_share(from, 0, FALSE);
9977   int is_old_table_tmpfile= IS_TMP_PREFIX(m_tabname);
9978   int is_new_table_tmpfile= IS_TMP_PREFIX(new_tabname);
9979   if (!is_new_table_tmpfile && !is_old_table_tmpfile)
9980   {
9981     /*
9982       this is a "real" rename table, i.e. not tied to an offline alter table
9983       - send new name == "to" in query field
9984     */
9985     ndbcluster_log_schema_op(thd, to, strlen(to),
9986                              old_dbname, m_tabname,
9987                              ndb_table_id, ndb_table_version,
9988                              SOT_RENAME_TABLE_PREPARE,
9989                              m_dbname, new_tabname);
9990   }
9991   if (share)
9992   {
9993     DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
9994                              share->key, share->use_count));
9995     ndbcluster_prepare_rename_share(share, to);
9996     int ret = ndbcluster_rename_share(thd, share);
9997     assert(ret == 0); NDB_IGNORE_VALUE(ret);
9998   }
9999 
10000   NdbDictionary::Table new_tab= *orig_tab;
10001   new_tab.setName(new_tabname);
10002   if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
10003   {
10004     NdbError ndb_error= dict->getNdbError();
10005     if (share)
10006     {
10007       int ret = ndbcluster_undo_rename_share(thd, share);
10008       assert(ret == 0); NDB_IGNORE_VALUE(ret);
10009       /* ndb_share reference temporary free */
10010       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
10011                                share->key, share->use_count));
10012       free_share(&share);
10013     }
10014     ERR_RETURN(ndb_error);
10015   }
10016 
10017   // Rename .ndb file
10018   if ((result= handler::rename_table(from, to)))
10019   {
10020     // ToDo in 4.1 should rollback alter table...
10021     if (share)
10022     {
10023       /* ndb_share reference temporary free */
10024       DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
10025                                share->key, share->use_count));
10026       free_share(&share);
10027     }
10028     DBUG_RETURN(result);
10029   }
10030 
10031   /* handle old table */
10032   if (!is_old_table_tmpfile)
10033   {
10034     ndbcluster_drop_event(thd, ndb, share, "rename table",
10035                           old_dbname, m_tabname);
10036   }
10037 
10038   if (!result && !is_new_table_tmpfile)
10039   {
10040     Ndb_table_guard ndbtab_g2(dict, new_tabname);
10041     const NDBTAB *ndbtab= ndbtab_g2.get_table();
10042 #ifdef HAVE_NDB_BINLOG
10043     if (share)
10044       ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,
10045                                          ::server_id, NULL, TRUE);
10046 #endif
10047     /* always create an event for the table */
10048     String event_name(INJECTOR_EVENT_LEN);
10049     ndb_rep_event_name(&event_name, new_dbname, new_tabname,
10050                        get_binlog_full(share));
10051 
10052     if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,
10053                                                        new_tabname) &&
10054         !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,
10055                                  share && ndb_binlog_running ? 2 : 1/* push warning */))
10056     {
10057       if (opt_ndb_extra_logging)
10058         sql_print_information("NDB Binlog: RENAME Event: %s",
10059                               event_name.c_ptr());
10060       if (share && (share->op == 0) &&
10061           ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))
10062       {
10063         sql_print_error("NDB Binlog: FAILED create event operations "
10064                         "during RENAME. Event %s", event_name.c_ptr());
10065         /* a warning has been issued to the client */
10066       }
10067     }
10068     /*
10069       warning has been issued if ndbcluster_create_event failed
10070       and (share && ndb_binlog_running)
10071     */
10072     if (!is_old_table_tmpfile)
10073     {
10074       /* "real" rename table */
10075       ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
10076                                old_dbname, m_tabname,
10077                                ndb_table_id, ndb_table_version,
10078                                SOT_RENAME_TABLE,
10079                                m_dbname, new_tabname);
10080     }
10081     else
10082     {
10083       /* final phase of offline alter table */
10084       ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
10085                                m_dbname, new_tabname,
10086                                ndb_table_id, ndb_table_version,
10087                                SOT_ALTER_TABLE_COMMIT,
10088                                NULL, NULL);
10089 
10090     }
10091   }
10092 
10093   // If we are moving tables between databases, we need to recreate
10094   // indexes
10095   if (recreate_indexes)
10096   {
10097     for (unsigned i = 0; i < index_list.count; i++)
10098     {
10099         NDBDICT::List::Element& index_el = index_list.elements[i];
10100 	// Recreate any indexes not stored in the system database
10101 	if (my_strcasecmp(system_charset_info,
10102 			  index_el.database, NDB_SYSTEM_DATABASE))
10103 	{
10104 	  set_dbname(from);
10105 	  ndb->setDatabaseName(m_dbname);
10106 	  const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);
10107 	  DBUG_PRINT("info", ("Creating index %s/%s",
10108 			      index_el.database, index->getName()));
10109 	  dict->createIndex(*index, new_tab);
10110 	  DBUG_PRINT("info", ("Dropping index %s/%s",
10111 			      index_el.database, index->getName()));
10112 	  set_dbname(from);
10113 	  ndb->setDatabaseName(m_dbname);
10114 	  dict->dropIndexGlobal(*index);
10115 	}
10116     }
10117   }
10118   if (share)
10119   {
10120     /* ndb_share reference temporary free */
10121     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
10122                              share->key, share->use_count));
10123     free_share(&share);
10124   }
10125 
10126   DBUG_RETURN(result);
10127 }
10128 
10129 
10130 /**
10131   Delete table from NDB Cluster.
10132 */
10133 
10134 static
10135 void
delete_table_drop_share(NDB_SHARE * share,const char * path)10136 delete_table_drop_share(NDB_SHARE* share, const char * path)
10137 {
10138   if (share)
10139   {
10140     pthread_mutex_lock(&ndbcluster_mutex);
10141 do_drop:
10142     if (share->state != NSS_DROPPED)
10143     {
10144       /*
10145         The share kept by the server has not been freed, free it
10146       */
10147       share->state= NSS_DROPPED;
10148       /* ndb_share reference create free */
10149       DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
10150                                share->key, share->use_count));
10151       free_share(&share, TRUE);
10152     }
10153     /* ndb_share reference temporary free */
10154     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
10155                              share->key, share->use_count));
10156     free_share(&share, TRUE);
10157     pthread_mutex_unlock(&ndbcluster_mutex);
10158   }
10159   else if (path)
10160   {
10161     pthread_mutex_lock(&ndbcluster_mutex);
10162     share= get_share(path, 0, FALSE, TRUE);
10163     if (share)
10164     {
10165       goto do_drop;
10166     }
10167     pthread_mutex_unlock(&ndbcluster_mutex);
10168   }
10169 }
10170 
10171 /* static version which does not need a handler */
10172 
10173 int
drop_table_impl(THD * thd,ha_ndbcluster * h,Ndb * ndb,const char * path,const char * db,const char * table_name)10174 ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,
10175                                const char *path,
10176                                const char *db,
10177                                const char *table_name)
10178 {
10179   DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table");
10180   NDBDICT *dict= ndb->getDictionary();
10181   int ndb_table_id= 0;
10182   int ndb_table_version= 0;
10183   /*
10184     Don't allow drop table unless
10185     schema distribution table is setup
10186   */
10187   if (!ndb_schema_share)
10188   {
10189     DBUG_PRINT("info", ("Schema distribution table not setup"));
10190     DBUG_RETURN(HA_ERR_NO_CONNECTION);
10191   }
10192   /* ndb_share reference temporary */
10193   NDB_SHARE *share= get_share(path, 0, FALSE);
10194   if (share)
10195   {
10196     DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
10197                              share->key, share->use_count));
10198   }
10199 
10200   /* Drop the table from NDB */
10201 
10202   int res= 0;
10203   if (h && h->m_table)
10204   {
10205 retry_temporary_error1:
10206     if (dict->dropTableGlobal(*h->m_table) == 0)
10207     {
10208       ndb_table_id= h->m_table->getObjectId();
10209       ndb_table_version= h->m_table->getObjectVersion();
10210       DBUG_PRINT("info", ("success 1"));
10211     }
10212     else
10213     {
10214       switch (dict->getNdbError().status)
10215       {
10216         case NdbError::TemporaryError:
10217           if (!thd->killed)
10218             goto retry_temporary_error1; // retry indefinitly
10219           break;
10220         default:
10221           break;
10222       }
10223       res= ndb_to_mysql_error(&dict->getNdbError());
10224       DBUG_PRINT("info", ("error(1) %u", res));
10225     }
10226     h->release_metadata(thd, ndb);
10227   }
10228   else
10229   {
10230     ndb->setDatabaseName(db);
10231     while (1)
10232     {
10233       Ndb_table_guard ndbtab_g(dict, table_name);
10234       if (ndbtab_g.get_table())
10235       {
10236     retry_temporary_error2:
10237         if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0)
10238         {
10239           ndb_table_id= ndbtab_g.get_table()->getObjectId();
10240           ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
10241           DBUG_PRINT("info", ("success 2"));
10242           break;
10243         }
10244         else
10245         {
10246           switch (dict->getNdbError().status)
10247           {
10248             case NdbError::TemporaryError:
10249               if (!thd->killed)
10250                 goto retry_temporary_error2; // retry indefinitly
10251               break;
10252             default:
10253               if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
10254               {
10255                 ndbtab_g.invalidate();
10256                 continue;
10257               }
10258               break;
10259           }
10260         }
10261       }
10262       res= ndb_to_mysql_error(&dict->getNdbError());
10263       DBUG_PRINT("info", ("error(2) %u", res));
10264       break;
10265     }
10266   }
10267 
10268   if (res)
10269   {
10270     /* the drop table failed for some reason, drop the share anyways */
10271     delete_table_drop_share(share, 0);
10272     DBUG_RETURN(res);
10273   }
10274 
10275   /* stop the logging of the dropped table, and cleanup */
10276 
10277   /*
10278     drop table is successful even if table does not exist in ndb
10279     and in case table was actually not dropped, there is no need
10280     to force a gcp, and setting the event_name to null will indicate
10281     that there is no event to be dropped
10282   */
10283   int table_dropped= dict->getNdbError().code != 709;
10284 
10285   {
10286     if (table_dropped)
10287     {
10288       ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
10289                                    db, table_name);
10290     }
10291     else
10292     {
10293       /**
10294        * Setting 0,0 will cause ndbcluster_drop_event *not* to be called
10295        */
10296       ndbcluster_handle_drop_table(thd, ndb, share, "delete table",
10297                                    0, 0);
10298     }
10299   }
10300 
10301   if (!IS_TMP_PREFIX(table_name) && share &&
10302       thd->lex->sql_command != SQLCOM_TRUNCATE)
10303   {
10304     ndbcluster_log_schema_op(thd,
10305                              thd->query(), thd->query_length(),
10306                              share->db, share->table_name,
10307                              ndb_table_id, ndb_table_version,
10308                              SOT_DROP_TABLE, NULL, NULL);
10309   }
10310 
10311   delete_table_drop_share(share, 0);
10312   DBUG_RETURN(0);
10313 }
10314 
delete_table(const char * name)10315 int ha_ndbcluster::delete_table(const char *name)
10316 {
10317   THD *thd= current_thd;
10318   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10319   Ndb *ndb;
10320   int error= 0;
10321   DBUG_ENTER("ha_ndbcluster::delete_table");
10322   DBUG_PRINT("enter", ("name: %s", name));
10323 
10324   if ((thd == injector_thd) ||
10325       (thd_ndb->options & TNO_NO_NDB_DROP_TABLE))
10326   {
10327     /*
10328       Table was dropped remotely is already
10329       dropped inside ndb.
10330       Just drop local files.
10331     */
10332     delete_table_drop_share(0, name);
10333     DBUG_RETURN(handler::delete_table(name));
10334   }
10335 
10336   set_dbname(name);
10337   set_tabname(name);
10338 
10339   /*
10340     Don't allow drop table unless
10341     schema distribution table is setup
10342   */
10343   if (!ndb_schema_share)
10344   {
10345     DBUG_PRINT("info", ("Schema distribution table not setup"));
10346     error= HA_ERR_NO_CONNECTION;
10347     goto err;
10348   }
10349 
10350   if (check_ndb_connection(thd))
10351   {
10352     error= HA_ERR_NO_CONNECTION;
10353     goto err;
10354   }
10355 
10356   ndb= thd_ndb->ndb;
10357 
10358   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))
10359   {
10360     error= HA_ERR_NO_CONNECTION;
10361     goto err;
10362   }
10363 
10364   /*
10365     Drop table in ndb.
10366     If it was already gone it might have been dropped
10367     remotely, give a warning and then drop .ndb file.
10368    */
10369   if (!(error= drop_table_impl(thd, this, ndb, name,
10370                                m_dbname, m_tabname)) ||
10371       error == HA_ERR_NO_SUCH_TABLE)
10372   {
10373     /* Call ancestor function to delete .ndb file */
10374     int error1= handler::delete_table(name);
10375     if (!error)
10376       error= error1;
10377   }
10378 
10379 err:
10380   DBUG_RETURN(error);
10381 }
10382 
10383 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)10384 void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
10385                                        ulonglong nb_desired_values,
10386                                        ulonglong *first_value,
10387                                        ulonglong *nb_reserved_values)
10388 {
10389   Uint64 auto_value;
10390   THD *thd= current_thd;
10391   DBUG_ENTER("get_auto_increment");
10392   DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
10393   Ndb *ndb= get_ndb(table->in_use);
10394   uint retries= NDB_AUTO_INCREMENT_RETRIES;
10395   int retry_sleep= 30; /* 30 milliseconds, transaction */
10396   for (;;)
10397   {
10398     Ndb_tuple_id_range_guard g(m_share);
10399     if ((m_skip_auto_increment &&
10400          ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
10401         ndb->getAutoIncrementValue(m_table, g.range, auto_value,
10402                                    Uint32(m_autoincrement_prefetch),
10403                                    increment, offset))
10404     {
10405       if (--retries && !thd->killed &&
10406           ndb->getNdbError().status == NdbError::TemporaryError)
10407       {
10408         do_retry_sleep(retry_sleep);
10409         continue;
10410       }
10411       const NdbError err= ndb->getNdbError();
10412       sql_print_error("Error %lu in ::get_auto_increment(): %s",
10413                       (ulong) err.code, err.message);
10414       *first_value= ~(ulonglong) 0;
10415       DBUG_VOID_RETURN;
10416     }
10417     break;
10418   }
10419   *first_value= (longlong)auto_value;
10420   /* From the point of view of MySQL, NDB reserves one row at a time */
10421   *nb_reserved_values= 1;
10422   DBUG_VOID_RETURN;
10423 }
10424 
10425 
10426 /**
10427   Constructor for the NDB Cluster table handler .
10428 */
10429 
ha_ndbcluster(handlerton * hton,TABLE_SHARE * table_arg)10430 ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
10431   handler(hton, table_arg),
10432   m_thd_ndb(NULL),
10433   m_active_cursor(NULL),
10434   m_table(NULL),
10435   m_ndb_record(0),
10436   m_ndb_hidden_key_record(0),
10437   m_table_info(NULL),
10438   m_share(0),
10439   m_key_fields(NULL),
10440   m_part_info(NULL),
10441   m_user_defined_partitioning(FALSE),
10442   m_use_partition_pruning(FALSE),
10443   m_sorted(FALSE),
10444   m_use_write(FALSE),
10445   m_ignore_dup_key(FALSE),
10446   m_has_unique_index(FALSE),
10447   m_ignore_no_key(FALSE),
10448   m_read_before_write_removal_possible(FALSE),
10449   m_read_before_write_removal_used(FALSE),
10450   m_rows_updated(0),
10451   m_rows_deleted(0),
10452   m_rows_to_insert((ha_rows) 1),
10453   m_rows_inserted((ha_rows) 0),
10454   m_rows_changed((ha_rows) 0),
10455   m_delete_cannot_batch(FALSE),
10456   m_update_cannot_batch(FALSE),
10457   m_skip_auto_increment(TRUE),
10458   m_blobs_pending(0),
10459   m_is_bulk_delete(false),
10460   m_blobs_row_total_size(0),
10461   m_blobs_buffer(0),
10462   m_blobs_buffer_size(0),
10463   m_dupkey((uint) -1),
10464   m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),
10465   m_pushed_join_member(NULL),
10466   m_pushed_join_operation(-1),
10467   m_disable_pushed_join(FALSE),
10468   m_active_query(NULL),
10469   m_pushed_operation(NULL),
10470   m_cond(NULL),
10471   m_multi_cursor(NULL)
10472 {
10473   int i;
10474 
10475   DBUG_ENTER("ha_ndbcluster");
10476 
10477   m_tabname[0]= '\0';
10478   m_dbname[0]= '\0';
10479 
10480   stats.records= ~(ha_rows)0; // uninitialized
10481   stats.block_size= 1024;
10482 
10483   for (i= 0; i < MAX_KEY; i++)
10484     ndb_init_index(m_index[i]);
10485 
10486   DBUG_VOID_RETURN;
10487 }
10488 
10489 
10490 /**
10491   Destructor for NDB Cluster table handler.
10492 */
10493 
~ha_ndbcluster()10494 ha_ndbcluster::~ha_ndbcluster()
10495 {
10496   THD *thd= current_thd;
10497   Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
10498   DBUG_ENTER("~ha_ndbcluster");
10499 
10500   if (m_share)
10501   {
10502     /* ndb_share reference handler free */
10503     DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
10504                              m_share->key, m_share->use_count));
10505     free_share(&m_share);
10506   }
10507   release_metadata(thd, ndb);
10508   release_blobs_buffer();
10509 
10510   // Check for open cursor/transaction
10511   DBUG_ASSERT(m_thd_ndb == NULL);
10512 
10513   // Discard any generated condition
10514   DBUG_PRINT("info", ("Deleting generated condition"));
10515   if (m_cond)
10516   {
10517     delete m_cond;
10518     m_cond= NULL;
10519   }
10520   DBUG_PRINT("info", ("Deleting pushed joins"));
10521 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
10522   DBUG_ASSERT(m_active_query == NULL);
10523   DBUG_ASSERT(m_active_cursor == NULL);
10524   if (m_pushed_join_operation==PUSHED_ROOT)
10525   {
10526     delete m_pushed_join_member;             // Also delete QueryDef
10527   }
10528   m_pushed_join_member= NULL;
10529 #endif
10530   DBUG_VOID_RETURN;
10531 }
10532 
10533 
10534 /**
10535   Open a table for further use
10536   - fetch metadata for this table from NDB
10537   - check that table exists
10538 
10539   @retval
10540     0    ok
10541   @retval
10542     < 0  Table has changed
10543 */
10544 
open(const char * name,int mode,uint test_if_locked)10545 int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
10546 {
10547   THD *thd= current_thd;
10548   int res;
10549   KEY *key;
10550   KEY_PART_INFO *key_part_info;
10551   uint key_parts, i, j;
10552   DBUG_ENTER("ha_ndbcluster::open");
10553   DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",
10554                        name, mode, test_if_locked));
10555 
10556   if (table_share->primary_key != MAX_KEY)
10557   {
10558     /*
10559       Setup ref_length to make room for the whole
10560       primary key to be written in the ref variable
10561     */
10562     key= table->key_info+table_share->primary_key;
10563     ref_length= key->key_length;
10564   }
10565   else
10566   {
10567     if (m_user_defined_partitioning)
10568     {
10569       /* Add space for partid in ref */
10570       ref_length+= sizeof(m_part_id);
10571     }
10572   }
10573   DBUG_PRINT("info", ("ref_length: %d", ref_length));
10574 
10575   {
10576     char* bitmap_array;
10577     uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;
10578     uint n_keys= table_share->keys + extra_hidden_keys;
10579     uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);
10580     uint map_size= sizeof(MY_BITMAP) * n_keys;
10581     m_key_fields= (MY_BITMAP**)my_malloc(ptr_size + map_size,
10582                                          MYF(MY_WME + MY_ZEROFILL));
10583     if (!m_key_fields)
10584     {
10585       local_close(thd, FALSE);
10586       DBUG_RETURN(1);
10587     }
10588     bitmap_array= ((char*)m_key_fields) + ptr_size;
10589     for (i= 0; i < n_keys; i++)
10590     {
10591       my_bitmap_map *bitbuf= NULL;
10592       bool is_hidden_key= (i == table_share->keys);
10593       m_key_fields[i]= (MY_BITMAP*)bitmap_array;
10594       if (is_hidden_key || (i == table_share->primary_key))
10595       {
10596         m_pk_bitmap_p= m_key_fields[i];
10597         bitbuf= m_pk_bitmap_buf;
10598       }
10599       if (bitmap_init(m_key_fields[i], bitbuf,
10600                       table_share->fields, FALSE))
10601       {
10602         m_key_fields[i]= NULL;
10603         local_close(thd, FALSE);
10604         DBUG_RETURN(1);
10605       }
10606       if (!is_hidden_key)
10607       {
10608         key= table->key_info + i;
10609         key_part_info= key->key_part;
10610         key_parts= key->user_defined_key_parts;
10611         for (j= 0; j < key_parts; j++, key_part_info++)
10612           bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);
10613       }
10614       else
10615       {
10616         uint field_no= table_share->fields;
10617         ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));
10618       }
10619       bitmap_array+= sizeof(MY_BITMAP);
10620     }
10621     m_key_fields[i]= NULL;
10622   }
10623 
10624   set_dbname(name);
10625   set_tabname(name);
10626 
10627   if ((res= check_ndb_connection(thd)) != 0)
10628   {
10629     local_close(thd, FALSE);
10630     DBUG_RETURN(res);
10631   }
10632 
10633   // Init table lock structure
10634   /* ndb_share reference handler */
10635   if ((m_share=get_share(name, table, FALSE)) == 0)
10636   {
10637     /**
10638      * No share present...we must create one
10639      */
10640     if (opt_ndb_extra_logging > 19)
10641     {
10642       sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",
10643                             name);
10644     }
10645     Ndb* ndb= check_ndb_in_thd(thd);
10646     ndbcluster_create_binlog_setup(thd, ndb, name, strlen(name),
10647                                    m_dbname, m_tabname, table);
10648     if ((m_share=get_share(name, table, FALSE)) == 0)
10649     {
10650       local_close(thd, FALSE);
10651       DBUG_RETURN(1);
10652     }
10653   }
10654 
10655   DBUG_PRINT("NDB_SHARE", ("%s handler  use_count: %u",
10656                            m_share->key, m_share->use_count));
10657   thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
10658 
10659   if ((res= get_metadata(thd, name)))
10660   {
10661     local_close(thd, FALSE);
10662     DBUG_RETURN(res);
10663   }
10664 
10665   if ((res= update_stats(thd, 1, true)) ||
10666       (res= info(HA_STATUS_CONST)))
10667   {
10668     local_close(thd, TRUE);
10669     DBUG_RETURN(res);
10670   }
10671   if (ndb_binlog_is_read_only())
10672   {
10673     table->db_stat|= HA_READ_ONLY;
10674     sql_print_information("table '%s' opened read only", name);
10675   }
10676   DBUG_RETURN(0);
10677 }
10678 
10679 /*
10680  * Support for OPTIMIZE TABLE
10681  * reclaims unused space of deleted rows
10682  * and updates index statistics
10683  */
optimize(THD * thd,HA_CHECK_OPT * check_opt)10684 int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)
10685 {
10686   ulong error, stats_error= 0;
10687   const uint delay= (uint)THDVAR(thd, optimization_delay);
10688 
10689   error= ndb_optimize_table(thd, delay);
10690   stats_error= update_stats(thd, 1);
10691   return (error) ? error : stats_error;
10692 }
10693 
ndb_optimize_table(THD * thd,uint delay)10694 int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)
10695 {
10696   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10697   Ndb *ndb= thd_ndb->ndb;
10698   NDBDICT *dict= ndb->getDictionary();
10699   int result=0, error= 0;
10700   uint i;
10701   NdbDictionary::OptimizeTableHandle th;
10702   NdbDictionary::OptimizeIndexHandle ih;
10703 
10704   DBUG_ENTER("ndb_optimize_table");
10705   if ((error= dict->optimizeTable(*m_table, th)))
10706   {
10707     DBUG_PRINT("info",
10708                ("Optimze table %s returned %d", m_tabname, error));
10709     ERR_RETURN(ndb->getNdbError());
10710   }
10711   while((result= th.next()) == 1)
10712   {
10713     if (thd->killed)
10714       DBUG_RETURN(-1);
10715     my_sleep(1000*delay);
10716   }
10717   if (result == -1 || th.close() == -1)
10718   {
10719     DBUG_PRINT("info",
10720                ("Optimize table %s did not complete", m_tabname));
10721     ERR_RETURN(ndb->getNdbError());
10722   };
10723   for (i= 0; i < MAX_KEY; i++)
10724   {
10725     if (thd->killed)
10726       DBUG_RETURN(-1);
10727     if (m_index[i].status == ACTIVE)
10728     {
10729       const NdbDictionary::Index *index= m_index[i].index;
10730       const NdbDictionary::Index *unique_index= m_index[i].unique_index;
10731 
10732       if (index)
10733       {
10734         if ((error= dict->optimizeIndex(*index, ih)))
10735         {
10736           DBUG_PRINT("info",
10737                      ("Optimze index %s returned %d",
10738                       index->getName(), error));
10739           ERR_RETURN(ndb->getNdbError());
10740 
10741         }
10742         while((result= ih.next()) == 1)
10743         {
10744           if (thd->killed)
10745             DBUG_RETURN(-1);
10746           my_sleep(1000*delay);
10747         }
10748         if (result == -1 || ih.close() == -1)
10749         {
10750           DBUG_PRINT("info",
10751                      ("Optimize index %s did not complete", index->getName()));
10752           ERR_RETURN(ndb->getNdbError());
10753         }
10754       }
10755       if (unique_index)
10756       {
10757         if ((error= dict->optimizeIndex(*unique_index, ih)))
10758         {
10759           DBUG_PRINT("info",
10760                      ("Optimze unique index %s returned %d",
10761                       unique_index->getName(), error));
10762           ERR_RETURN(ndb->getNdbError());
10763         }
10764         while((result= ih.next()) == 1)
10765         {
10766           if (thd->killed)
10767             DBUG_RETURN(-1);
10768           my_sleep(1000*delay);
10769         }
10770         if (result == -1 || ih.close() == -1)
10771         {
10772           DBUG_PRINT("info",
10773                      ("Optimize index %s did not complete", index->getName()));
10774           ERR_RETURN(ndb->getNdbError());
10775         }
10776       }
10777     }
10778   }
10779   DBUG_RETURN(0);
10780 }
10781 
analyze(THD * thd,HA_CHECK_OPT * check_opt)10782 int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)
10783 {
10784   int err;
10785   if ((err= update_stats(thd, 1)) != 0)
10786     return err;
10787   const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
10788                                 THDVAR(thd, index_stat_enable);
10789   if (index_stat_enable)
10790   {
10791     if ((err= analyze_index(thd)) != 0)
10792       return err;
10793   }
10794   return 0;
10795 }
10796 
10797 int
analyze_index(THD * thd)10798 ha_ndbcluster::analyze_index(THD *thd)
10799 {
10800   DBUG_ENTER("ha_ndbcluster::analyze_index");
10801 
10802   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10803   Ndb *ndb= thd_ndb->ndb;
10804 
10805   uint inx_list[MAX_INDEXES];
10806   uint inx_count= 0;
10807 
10808   uint inx;
10809   for (inx= 0; inx < table_share->keys; inx++)
10810   {
10811     NDB_INDEX_TYPE idx_type= get_index_type(inx);
10812 
10813     if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
10814          idx_type == UNIQUE_ORDERED_INDEX ||
10815          idx_type == ORDERED_INDEX))
10816     {
10817       if (inx_count < MAX_INDEXES)
10818         inx_list[inx_count++]= inx;
10819     }
10820   }
10821 
10822   if (inx_count != 0)
10823   {
10824     int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);
10825     if (err != 0)
10826       DBUG_RETURN(err);
10827   }
10828   DBUG_RETURN(0);
10829 }
10830 
10831 /*
10832   Set partition info
10833 
10834   SYNOPSIS
10835     set_part_info()
10836     part_info
10837 
10838   RETURN VALUE
10839     NONE
10840 
10841   DESCRIPTION
10842     Set up partition info when handler object created
10843 */
10844 
set_part_info(partition_info * part_info,bool early)10845 void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)
10846 {
10847   DBUG_ENTER("ha_ndbcluster::set_part_info");
10848   m_part_info= part_info;
10849   if (!early)
10850   {
10851     m_use_partition_pruning= FALSE;
10852     if (!(m_part_info->part_type == HASH_PARTITION &&
10853           m_part_info->list_of_part_fields &&
10854           !m_part_info->is_sub_partitioned()))
10855     {
10856       /*
10857         PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants
10858         all use MySQL defined partitioning. PARTITION BY KEY uses NDB native
10859         partitioning scheme.
10860       */
10861       m_use_partition_pruning= TRUE;
10862       m_user_defined_partitioning= TRUE;
10863     }
10864     if (m_part_info->part_type == HASH_PARTITION &&
10865         m_part_info->list_of_part_fields &&
10866         partition_info_num_full_part_fields(m_part_info) == 0)
10867     {
10868       /*
10869         CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();
10870         where no primary key is defined uses a hidden key as partition field
10871         and this makes it impossible to use any partition pruning. Partition
10872         pruning requires partitioning based on real fields, also the lack of
10873         a primary key means that all accesses to tables are based on either
10874         full table scans or index scans and they can never be pruned those
10875         scans given that the hidden key is unknown. In write_row, update_row,
10876         and delete_row the normal hidden key handling will fix things.
10877       */
10878       m_use_partition_pruning= FALSE;
10879     }
10880     DBUG_PRINT("info", ("m_use_partition_pruning = %d",
10881                          m_use_partition_pruning));
10882   }
10883   DBUG_VOID_RETURN;
10884 }
10885 
10886 /**
10887   Close the table
10888   - release resources setup by open()
10889  */
10890 
local_close(THD * thd,bool release_metadata_flag)10891 void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)
10892 {
10893   Ndb *ndb;
10894   DBUG_ENTER("ha_ndbcluster::local_close");
10895   if (m_key_fields)
10896   {
10897     MY_BITMAP **inx_bitmap;
10898     for (inx_bitmap= m_key_fields;
10899          (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);
10900          inx_bitmap++)
10901       if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)
10902         bitmap_free(*inx_bitmap);
10903     my_free((char*)m_key_fields, MYF(0));
10904     m_key_fields= NULL;
10905   }
10906   if (m_share)
10907   {
10908     /* ndb_share reference handler free */
10909     DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
10910                              m_share->key, m_share->use_count));
10911     free_share(&m_share);
10912   }
10913   m_share= 0;
10914   if (release_metadata_flag)
10915   {
10916     ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
10917     release_metadata(thd, ndb);
10918   }
10919   DBUG_VOID_RETURN;
10920 }
10921 
close(void)10922 int ha_ndbcluster::close(void)
10923 {
10924   DBUG_ENTER("close");
10925   THD *thd= table->in_use;
10926   local_close(thd, TRUE);
10927   DBUG_RETURN(0);
10928 }
10929 
10930 
check_ndb_connection(THD * thd)10931 int ha_ndbcluster::check_ndb_connection(THD* thd)
10932 {
10933   Ndb *ndb;
10934   DBUG_ENTER("check_ndb_connection");
10935 
10936   if (!(ndb= check_ndb_in_thd(thd, true)))
10937     DBUG_RETURN(HA_ERR_NO_CONNECTION);
10938   if (ndb->setDatabaseName(m_dbname))
10939   {
10940     ERR_RETURN(ndb->getNdbError());
10941   }
10942   DBUG_RETURN(0);
10943 }
10944 
10945 
ndbcluster_close_connection(handlerton * hton,THD * thd)10946 static int ndbcluster_close_connection(handlerton *hton, THD *thd)
10947 {
10948   Thd_ndb *thd_ndb= get_thd_ndb(thd);
10949   DBUG_ENTER("ndbcluster_close_connection");
10950   if (thd_ndb)
10951   {
10952     Thd_ndb::release(thd_ndb);
10953     thd_set_thd_ndb(thd, NULL);
10954   }
10955   DBUG_RETURN(0);
10956 }
10957 
10958 
10959 /**
10960   Try to discover one table from NDB.
10961 */
10962 
ndbcluster_discover(handlerton * hton,THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)10963 int ndbcluster_discover(handlerton *hton, THD* thd, const char *db,
10964                         const char *name,
10965                         uchar **frmblob,
10966                         size_t *frmlen)
10967 {
10968   int error= 0;
10969   NdbError ndb_error;
10970   size_t len;
10971   uchar* data= NULL;
10972   Ndb* ndb;
10973   char key[FN_REFLEN + 1];
10974   DBUG_ENTER("ndbcluster_discover");
10975   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
10976 
10977   if (!(ndb= check_ndb_in_thd(thd)))
10978     DBUG_RETURN(HA_ERR_NO_CONNECTION);
10979   if (ndb->setDatabaseName(db))
10980   {
10981     ERR_RETURN(ndb->getNdbError());
10982   }
10983   NDBDICT* dict= ndb->getDictionary();
10984   build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
10985   /* ndb_share reference temporary */
10986   NDB_SHARE *share= get_share(key, 0, FALSE);
10987   if (share)
10988   {
10989     DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
10990                              share->key, share->use_count));
10991   }
10992   if (share && get_ndb_share_state(share) == NSS_ALTERED)
10993   {
10994     // Frm has been altered on disk, but not yet written to ndb
10995     if (readfrm(key, &data, &len))
10996     {
10997       DBUG_PRINT("error", ("Could not read frm"));
10998       error= 1;
10999       goto err;
11000     }
11001   }
11002   else
11003   {
11004     Ndb_table_guard ndbtab_g(dict, name);
11005     const NDBTAB *tab= ndbtab_g.get_table();
11006     if (!tab)
11007     {
11008       const NdbError err= dict->getNdbError();
11009       if (err.code == 709 || err.code == 723)
11010       {
11011         error= -1;
11012         DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
11013       }
11014       else
11015       {
11016         error= -1;
11017         ndb_error= err;
11018         DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
11019       }
11020       goto err;
11021     }
11022     DBUG_PRINT("info", ("Found table %s", tab->getName()));
11023 
11024     len= tab->getFrmLength();
11025     if (len == 0 || tab->getFrmData() == NULL)
11026     {
11027       DBUG_PRINT("error", ("No frm data found."));
11028       error= 1;
11029       goto err;
11030     }
11031 
11032     if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))
11033     {
11034       DBUG_PRINT("error", ("Could not unpack table"));
11035       error= 1;
11036       goto err;
11037     }
11038   }
11039 #ifdef HAVE_NDB_BINLOG
11040   if (ndbcluster_check_if_local_table(db, name) &&
11041       !Ndb_dist_priv_util::is_distributed_priv_table(db, name))
11042   {
11043     DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",
11044                         db, name));
11045     sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",
11046                     db, name);
11047     error= 1;
11048     goto err;
11049   }
11050 #endif
11051   *frmlen= len;
11052   *frmblob= data;
11053 
11054   if (share)
11055   {
11056     /* ndb_share reference temporary free */
11057     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
11058                              share->key, share->use_count));
11059     free_share(&share);
11060   }
11061 
11062   DBUG_RETURN(0);
11063 err:
11064   my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
11065   if (share)
11066   {
11067     /* ndb_share reference temporary free */
11068     DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
11069                              share->key, share->use_count));
11070     free_share(&share);
11071   }
11072 
11073   if (ndb_error.code)
11074   {
11075     ERR_RETURN(ndb_error);
11076   }
11077   DBUG_RETURN(error);
11078 }
11079 
11080 /**
11081   Check if a table exists in NDB.
11082 */
11083 
ndbcluster_table_exists_in_engine(handlerton * hton,THD * thd,const char * db,const char * name)11084 int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd,
11085                                       const char *db,
11086                                       const char *name)
11087 {
11088   Ndb* ndb;
11089   DBUG_ENTER("ndbcluster_table_exists_in_engine");
11090   DBUG_PRINT("enter", ("db: %s  name: %s", db, name));
11091 
11092   if (!(ndb= check_ndb_in_thd(thd)))
11093     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11094   NDBDICT* dict= ndb->getDictionary();
11095   NdbDictionary::Dictionary::List list;
11096   if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
11097   {
11098     ERR_RETURN(dict->getNdbError());
11099   }
11100   for (uint i= 0 ; i < list.count ; i++)
11101   {
11102     NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
11103     if (my_strcasecmp(table_alias_charset, elmt.database, db))
11104       continue;
11105     if (my_strcasecmp(table_alias_charset, elmt.name, name))
11106       continue;
11107     DBUG_PRINT("info", ("Found table"));
11108     DBUG_RETURN(HA_ERR_TABLE_EXIST);
11109   }
11110   DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
11111 }
11112 
11113 
11114 
tables_get_key(const char * entry,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))11115 extern "C" uchar* tables_get_key(const char *entry, size_t *length,
11116                                 my_bool not_used MY_ATTRIBUTE((unused)))
11117 {
11118   *length= strlen(entry);
11119   return (uchar*) entry;
11120 }
11121 
11122 
11123 /**
11124   Drop a database in NDB Cluster
11125 
11126   @note
11127     add a dummy void function, since stupid handlerton is returning void instead of int...
11128 */
ndbcluster_drop_database_impl(THD * thd,const char * path)11129 int ndbcluster_drop_database_impl(THD *thd, const char *path)
11130 {
11131   DBUG_ENTER("ndbcluster_drop_database");
11132   char dbname[FN_HEADLEN];
11133   Ndb* ndb;
11134   NdbDictionary::Dictionary::List list;
11135   uint i;
11136   char *tabname;
11137   List<char> drop_list;
11138   int ret= 0;
11139   ha_ndbcluster::set_dbname(path, (char *)&dbname);
11140   DBUG_PRINT("enter", ("db: %s", dbname));
11141 
11142   if (!(ndb= check_ndb_in_thd(thd)))
11143     DBUG_RETURN(-1);
11144 
11145   // List tables in NDB
11146   NDBDICT *dict= ndb->getDictionary();
11147   if (dict->listObjects(list,
11148                         NdbDictionary::Object::UserTable) != 0)
11149     DBUG_RETURN(-1);
11150   for (i= 0 ; i < list.count ; i++)
11151   {
11152     NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
11153     DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
11154 
11155     // Add only tables that belongs to db
11156     // Ignore Blob part tables - they are deleted when their table
11157     // is deleted.
11158     if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||
11159         IS_NDB_BLOB_PREFIX(elmt.name))
11160       continue;
11161     DBUG_PRINT("info", ("%s must be dropped", elmt.name));
11162     drop_list.push_back(thd->strdup(elmt.name));
11163   }
11164   // Drop any tables belonging to database
11165   char full_path[FN_REFLEN + 1];
11166   char *tmp= full_path +
11167     build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
11168   if (ndb->setDatabaseName(dbname))
11169   {
11170     ERR_RETURN(ndb->getNdbError());
11171   }
11172   List_iterator_fast<char> it(drop_list);
11173   while ((tabname=it++))
11174   {
11175     tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1);
11176     if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))
11177     {
11178       const NdbError err= dict->getNdbError();
11179       if (err.code != 709 && err.code != 723)
11180       {
11181         ret= ndb_to_mysql_error(&err);
11182       }
11183     }
11184   }
11185 
11186   dict->invalidateDbGlobal(dbname);
11187   DBUG_RETURN(ret);
11188 }
11189 
ndbcluster_drop_database(handlerton * hton,char * path)11190 static void ndbcluster_drop_database(handlerton *hton, char *path)
11191 {
11192   THD *thd= current_thd;
11193   DBUG_ENTER("ndbcluster_drop_database");
11194   /*
11195     Don't allow drop database unless
11196     schema distribution table is setup
11197   */
11198   if (!ndb_schema_share)
11199   {
11200     DBUG_PRINT("info", ("Schema distribution table not setup"));
11201     DBUG_VOID_RETURN;
11202   }
11203   ndbcluster_drop_database_impl(thd, path);
11204   char db[FN_REFLEN];
11205   ha_ndbcluster::set_dbname(path, db);
11206   uint32 table_id= 0, table_version= 0;
11207   /*
11208     Since databases aren't real ndb schema object
11209     they don't have any id/version
11210 
11211     But since that id/version is used to make sure that event's on SCHEMA_TABLE
11212     is correct, we set random numbers
11213   */
11214   table_id = (uint32)rand();
11215   table_version = (uint32)rand();
11216   ndbcluster_log_schema_op(thd,
11217                            thd->query(), thd->query_length(),
11218                            db, "", table_id, table_version,
11219                            SOT_DROP_DB, NULL, NULL);
11220   DBUG_VOID_RETURN;
11221 }
11222 
ndb_create_table_from_engine(THD * thd,const char * db,const char * table_name)11223 int ndb_create_table_from_engine(THD *thd, const char *db,
11224                                  const char *table_name)
11225 {
11226   // Copy db and table_name to stack buffers since functions used by
11227   // ha_create_table_from_engine may convert to lowercase on some platforms
11228   char db_buf[FN_REFLEN + 1];
11229   char table_name_buf[FN_REFLEN + 1];
11230   strnmov(db_buf, db, sizeof(db_buf));
11231   strnmov(table_name_buf, table_name, sizeof(table_name_buf));
11232 
11233   LEX *old_lex= thd->lex, newlex;
11234   thd->lex= &newlex;
11235   newlex.current_select= NULL;
11236   lex_start(thd);
11237   int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);
11238   thd->lex= old_lex;
11239   return res;
11240 }
11241 
11242 /*
11243   find all tables in ndb and discover those needed
11244 */
ndbcluster_find_all_files(THD * thd)11245 int ndbcluster_find_all_files(THD *thd)
11246 {
11247   Ndb* ndb;
11248   char key[FN_REFLEN + 1];
11249   NDBDICT *dict;
11250   int unhandled, retries= 5, skipped;
11251   DBUG_ENTER("ndbcluster_find_all_files");
11252 
11253   if (!(ndb= check_ndb_in_thd(thd)))
11254     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11255 
11256   dict= ndb->getDictionary();
11257 
11258   LINT_INIT(unhandled);
11259   LINT_INIT(skipped);
11260   do
11261   {
11262     NdbDictionary::Dictionary::List list;
11263     if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
11264       ERR_RETURN(dict->getNdbError());
11265     unhandled= 0;
11266     skipped= 0;
11267     retries--;
11268     for (uint i= 0 ; i < list.count ; i++)
11269     {
11270       NDBDICT::List::Element& elmt= list.elements[i];
11271       if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
11272       {
11273         DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
11274         continue;
11275       }
11276       DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name));
11277       if (elmt.state != NDBOBJ::StateOnline &&
11278           elmt.state != NDBOBJ::StateBackup &&
11279           elmt.state != NDBOBJ::StateBuilding)
11280       {
11281         sql_print_information("NDB: skipping setup table %s.%s, in state %d",
11282                               elmt.database, elmt.name, elmt.state);
11283         skipped++;
11284         continue;
11285       }
11286 
11287       ndb->setDatabaseName(elmt.database);
11288       Ndb_table_guard ndbtab_g(dict, elmt.name);
11289       const NDBTAB *ndbtab= ndbtab_g.get_table();
11290       if (!ndbtab)
11291       {
11292         if (retries == 0)
11293           sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s",
11294                           elmt.database, elmt.name,
11295                           dict->getNdbError().code,
11296                           dict->getNdbError().message);
11297         unhandled++;
11298         continue;
11299       }
11300 
11301       if (ndbtab->getFrmLength() == 0)
11302         continue;
11303 
11304       /* check if database exists */
11305       char *end= key +
11306         build_table_filename(key, sizeof(key) - 1, elmt.database, "", "", 0);
11307       if (my_access(key, F_OK))
11308       {
11309         /* no such database defined, skip table */
11310         continue;
11311       }
11312       /* finalize construction of path */
11313       end+= tablename_to_filename(elmt.name, end,
11314                                   sizeof(key)-(end-key));
11315       uchar *data= 0, *pack_data= 0;
11316       size_t length, pack_length;
11317       int discover= 0;
11318       if (readfrm(key, &data, &length) ||
11319           packfrm(data, length, &pack_data, &pack_length))
11320       {
11321         discover= 1;
11322         sql_print_information("NDB: missing frm for %s.%s, discovering...",
11323                               elmt.database, elmt.name);
11324       }
11325       else if (cmp_frm(ndbtab, pack_data, pack_length))
11326       {
11327         /* ndb_share reference temporary */
11328         NDB_SHARE *share= get_share(key, 0, FALSE);
11329         if (share)
11330         {
11331           DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
11332                                    share->key, share->use_count));
11333         }
11334         if (!share || get_ndb_share_state(share) != NSS_ALTERED)
11335         {
11336           discover= 1;
11337           sql_print_information("NDB: mismatch in frm for %s.%s, discovering...",
11338                                 elmt.database, elmt.name);
11339         }
11340         if (share)
11341         {
11342           /* ndb_share reference temporary free */
11343           DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
11344                                    share->key, share->use_count));
11345           free_share(&share);
11346         }
11347       }
11348       my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR));
11349       my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR));
11350 
11351       if (discover)
11352       {
11353         /* ToDo 4.1 database needs to be created if missing */
11354         if (ndb_create_table_from_engine(thd, elmt.database, elmt.name))
11355         {
11356           /* ToDo 4.1 handle error */
11357         }
11358       }
11359       else
11360       {
11361         /* set up replication for this table */
11362         ndbcluster_create_binlog_setup(thd, ndb, key, end-key,
11363                                        elmt.database, elmt.name,
11364                                        0);
11365       }
11366     }
11367   }
11368   while (unhandled && retries);
11369 
11370   DBUG_RETURN(-(skipped + unhandled));
11371 }
11372 
11373 
11374 static int
ndbcluster_find_files(handlerton * hton,THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)11375 ndbcluster_find_files(handlerton *hton, THD *thd,
11376                       const char *db, const char *path,
11377                       const char *wild, bool dir, List<LEX_STRING> *files)
11378 {
11379   DBUG_ENTER("ndbcluster_find_files");
11380   DBUG_PRINT("enter", ("db: %s", db));
11381   { // extra bracket to avoid gcc 2.95.3 warning
11382   uint i;
11383   Thd_ndb *thd_ndb;
11384   Ndb* ndb;
11385   char name[FN_REFLEN + 1];
11386   HASH ndb_tables, ok_tables;
11387   NDBDICT::List list;
11388 
11389   if (!(ndb= check_ndb_in_thd(thd)))
11390     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11391   thd_ndb= get_thd_ndb(thd);
11392 
11393   if (dir)
11394     DBUG_RETURN(0); // Discover of databases not yet supported
11395 
11396   Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);
11397   if (ndb_global_schema_lock_guard.lock())
11398     DBUG_RETURN(HA_ERR_NO_CONNECTION);
11399 
11400   // List tables in NDB
11401   NDBDICT *dict= ndb->getDictionary();
11402   if (dict->listObjects(list,
11403                         NdbDictionary::Object::UserTable) != 0)
11404     ERR_RETURN(dict->getNdbError());
11405 
11406   if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,
11407                    (my_hash_get_key)tables_get_key,0,0))
11408   {
11409     DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
11410     DBUG_RETURN(-1);
11411   }
11412 
11413   if (my_hash_init(&ok_tables, system_charset_info,32,0,0,
11414                    (my_hash_get_key)tables_get_key,0,0))
11415   {
11416     DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
11417     my_hash_free(&ndb_tables);
11418     DBUG_RETURN(-1);
11419   }
11420 
11421   for (i= 0 ; i < list.count ; i++)
11422   {
11423     NDBDICT::List::Element& elmt= list.elements[i];
11424     if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
11425     {
11426       DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
11427       continue;
11428     }
11429     DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));
11430 
11431     // Add only tables that belongs to db
11432     if (my_strcasecmp(system_charset_info, elmt.database, db))
11433       continue;
11434 
11435     // Apply wildcard to list of tables in NDB
11436     if (wild)
11437     {
11438       if (lower_case_table_names)
11439       {
11440         if (wild_case_compare(files_charset_info, elmt.name, wild))
11441           continue;
11442       }
11443       else if (wild_compare(elmt.name,wild,0))
11444         continue;
11445     }
11446     DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));
11447     my_hash_insert(&ndb_tables, (uchar*)thd->strdup(elmt.name));
11448   }
11449 
11450   LEX_STRING *file_name;
11451   List_iterator<LEX_STRING> it(*files);
11452   List<char> delete_list;
11453   char *file_name_str;
11454   while ((file_name=it++))
11455   {
11456     bool file_on_disk= FALSE;
11457     DBUG_PRINT("info", ("%s", file_name->str));
11458     if (my_hash_search(&ndb_tables,
11459                        (const uchar*)file_name->str, file_name->length))
11460     {
11461       build_table_filename(name, sizeof(name) - 1, db,
11462                            file_name->str, reg_ext, 0);
11463       if (my_access(name, F_OK))
11464       {
11465         DBUG_PRINT("info", ("Table %s listed and need discovery",
11466                             file_name->str));
11467         if (ndb_create_table_from_engine(thd, db, file_name->str))
11468         {
11469           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
11470                               ER_TABLE_EXISTS_ERROR,
11471                               "Discover of table %s.%s failed",
11472                               db, file_name->str);
11473           continue;
11474         }
11475       }
11476       DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));
11477       file_on_disk= TRUE;
11478     }
11479 
11480     // Check for .ndb file with this name
11481     build_table_filename(name, sizeof(name) - 1, db,
11482                          file_name->str, ha_ndb_ext, 0);
11483     DBUG_PRINT("info", ("Check access for %s", name));
11484     if (my_access(name, F_OK))
11485     {
11486       DBUG_PRINT("info", ("%s did not exist on disk", name));
11487       // .ndb file did not exist on disk, another table type
11488       if (file_on_disk)
11489       {
11490 	// Ignore this ndb table
11491  	uchar *record= my_hash_search(&ndb_tables,
11492                                       (const uchar*) file_name->str,
11493                                       file_name->length);
11494 	DBUG_ASSERT(record);
11495 	my_hash_delete(&ndb_tables, record);
11496 	push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
11497 			    ER_TABLE_EXISTS_ERROR,
11498 			    "Local table %s.%s shadows ndb table",
11499 			    db, file_name->str);
11500       }
11501       continue;
11502     }
11503     if (file_on_disk)
11504     {
11505       // File existed in NDB and as frm file, put in ok_tables list
11506       my_hash_insert(&ok_tables, (uchar*) file_name->str);
11507       continue;
11508     }
11509     DBUG_PRINT("info", ("%s existed on disk", name));
11510     // The .ndb file exists on disk, but it's not in list of tables in ndb
11511     // Verify that handler agrees table is gone.
11512     if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==
11513         HA_ERR_NO_SUCH_TABLE)
11514     {
11515       DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));
11516       it.remove();
11517       // Put in list of tables to remove from disk
11518       delete_list.push_back(thd->strdup(file_name->str));
11519     }
11520   }
11521 
11522   /* setup logging to binlog for all discovered tables */
11523   {
11524     char *end, *end1= name +
11525       build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
11526     for (i= 0; i < ok_tables.records; i++)
11527     {
11528       file_name_str= (char*)my_hash_element(&ok_tables, i);
11529       end= end1 +
11530         tablename_to_filename(file_name_str, end1, sizeof(name) - (end1 - name));
11531       ndbcluster_create_binlog_setup(thd, ndb, name, end-name,
11532                                      db, file_name_str, 0);
11533     }
11534   }
11535 
11536   // Check for new files to discover
11537   DBUG_PRINT("info", ("Checking for new files to discover"));
11538   List<char> create_list;
11539   for (i= 0 ; i < ndb_tables.records ; i++)
11540   {
11541     file_name_str= (char*) my_hash_element(&ndb_tables, i);
11542     if (!my_hash_search(&ok_tables,
11543                         (const uchar*) file_name_str, strlen(file_name_str)))
11544     {
11545       build_table_filename(name, sizeof(name) - 1,
11546                            db, file_name_str, reg_ext, 0);
11547       if (my_access(name, F_OK))
11548       {
11549         DBUG_PRINT("info", ("%s must be discovered", file_name_str));
11550         // File is in list of ndb tables and not in ok_tables
11551         // This table need to be created
11552         create_list.push_back(thd->strdup(file_name_str));
11553       }
11554     }
11555   }
11556 
11557 #ifndef NDB_NO_MYSQL_RM_TABLE_PART2
11558   /*
11559     Delete old files
11560 
11561     ndbcluster_find_files() may be called from I_S code and ndbcluster_binlog
11562     thread in situations when some tables are already open. This means that
11563     code below will try to obtain exclusive metadata lock on some table
11564     while holding shared meta-data lock on other tables. This might lead to a
11565     deadlock but such a deadlock should be detected by MDL deadlock detector.
11566   */
11567   List_iterator_fast<char> it3(delete_list);
11568   while ((file_name_str= it3++))
11569   {
11570     DBUG_PRINT("info", ("Removing table %s/%s", db, file_name_str));
11571     // Delete the table and all related files
11572     TABLE_LIST table_list;
11573     table_list.init_one_table(db, strlen(db),
11574                               file_name_str, strlen(file_name_str),
11575                               file_name_str,
11576                               TL_WRITE);
11577     table_list.mdl_request.set_type(MDL_EXCLUSIVE);
11578     /*
11579       set TNO_NO_NDB_DROP_TABLE flag to not drop ndb table.
11580       it should not exist anyways
11581     */
11582     thd_ndb->options|= TNO_NO_NDB_DROP_TABLE;
11583     (void)mysql_rm_table_part2(thd, &table_list,
11584                                false,   /* if_exists */
11585                                false,   /* drop_temporary */
11586                                false,   /* drop_view */
11587                                true     /* dont_log_query*/);
11588     thd_ndb->options&= ~TNO_NO_NDB_DROP_TABLE;
11589     trans_commit_implicit(thd); /* Safety, should be unnecessary. */
11590     thd->mdl_context.release_transactional_locks();
11591     /* Clear error message that is returned when table is deleted */
11592     thd->clear_error();
11593   }
11594 #endif
11595 
11596   // Create new files
11597   List_iterator_fast<char> it2(create_list);
11598   while ((file_name_str=it2++))
11599   {
11600     DBUG_PRINT("info", ("Table %s need discovery", file_name_str));
11601     if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)
11602     {
11603       LEX_STRING *tmp_file_name= 0;
11604       tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,
11605                                           strlen(file_name_str), TRUE);
11606       files->push_back(tmp_file_name);
11607     }
11608   }
11609 
11610   my_hash_free(&ok_tables);
11611   my_hash_free(&ndb_tables);
11612 
11613   // Delete schema file from files
11614   if (!strcmp(db, NDB_REP_DB))
11615   {
11616     uint count = 0;
11617     while (count++ < files->elements)
11618     {
11619       file_name = (LEX_STRING *)files->pop();
11620       if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))
11621       {
11622         DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user",
11623                    NDB_REP_DB, NDB_SCHEMA_TABLE));
11624         continue;
11625       }
11626       files->push_back(file_name);
11627     }
11628   }
11629   } // extra bracket to avoid gcc 2.95.3 warning
11630   DBUG_RETURN(0);
11631 }
11632 
11633 
11634 /*
11635   Initialise all gloal variables before creating
11636   a NDB Cluster table handler
11637  */
11638 
11639 /* Call back after cluster connect */
connect_callback()11640 static int connect_callback()
11641 {
11642   pthread_mutex_lock(&LOCK_ndb_util_thread);
11643   update_status_variables(NULL, &g_ndb_status,
11644                           g_ndb_cluster_connection);
11645 
11646   uint node_id, i= 0;
11647   Ndb_cluster_connection_node_iter node_iter;
11648   memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map));
11649   while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter)))
11650     g_node_id_map[node_id]= i++;
11651 
11652   pthread_cond_signal(&COND_ndb_util_thread);
11653   pthread_mutex_unlock(&LOCK_ndb_util_thread);
11654   return 0;
11655 }
11656 
11657 #ifndef NDB_NO_WAIT_SETUP
ndb_wait_setup_func_impl(ulong max_wait)11658 static int ndb_wait_setup_func_impl(ulong max_wait)
11659 {
11660   DBUG_ENTER("ndb_wait_setup_func_impl");
11661 
11662   pthread_mutex_lock(&ndbcluster_mutex);
11663 
11664   struct timespec abstime;
11665   set_timespec(abstime, 1);
11666 
11667   while (!ndb_setup_complete && max_wait)
11668   {
11669     int rc= pthread_cond_timedwait(&COND_ndb_setup_complete,
11670                                    &ndbcluster_mutex,
11671                                    &abstime);
11672     if (rc)
11673     {
11674       if (rc == ETIMEDOUT)
11675       {
11676         DBUG_PRINT("info", ("1s elapsed waiting"));
11677         max_wait--;
11678         set_timespec(abstime, 1); /* 1 second from now*/
11679       }
11680       else
11681       {
11682         DBUG_PRINT("info", ("Bad pthread_cond_timedwait rc : %u",
11683                             rc));
11684         assert(false);
11685         break;
11686       }
11687     }
11688   }
11689 
11690   pthread_mutex_unlock(&ndbcluster_mutex);
11691 
11692   DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);
11693 }
11694 
11695 int(*ndb_wait_setup_func)(ulong) = 0;
11696 #endif
11697 extern int ndb_dictionary_is_mysqld;
11698 
ndbcluster_init(void * p)11699 static int ndbcluster_init(void *p)
11700 {
11701   DBUG_ENTER("ndbcluster_init");
11702 
11703   if (ndbcluster_inited)
11704     DBUG_RETURN(FALSE);
11705 
11706   pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
11707   pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
11708   pthread_cond_init(&COND_ndb_util_thread, NULL);
11709   pthread_cond_init(&COND_ndb_util_ready, NULL);
11710   pthread_cond_init(&COND_ndb_setup_complete, NULL);
11711   ndb_util_thread_running= -1;
11712   pthread_mutex_init(&LOCK_ndb_index_stat_thread, MY_MUTEX_INIT_FAST);
11713   pthread_cond_init(&COND_ndb_index_stat_thread, NULL);
11714   pthread_cond_init(&COND_ndb_index_stat_ready, NULL);
11715   pthread_mutex_init(&ndb_index_stat_list_mutex, MY_MUTEX_INIT_FAST);
11716   pthread_mutex_init(&ndb_index_stat_stat_mutex, MY_MUTEX_INIT_FAST);
11717   pthread_cond_init(&ndb_index_stat_stat_cond, NULL);
11718   ndb_index_stat_thread_running= -1;
11719   ndbcluster_terminating= 0;
11720   ndb_dictionary_is_mysqld= 1;
11721   ndb_setup_complete= 0;
11722   ndbcluster_hton= (handlerton *)p;
11723   ndbcluster_global_schema_lock_init(ndbcluster_hton);
11724 
11725   {
11726     handlerton *h= ndbcluster_hton;
11727     h->state=            SHOW_OPTION_YES;
11728     h->db_type=          DB_TYPE_NDBCLUSTER;
11729     h->close_connection= ndbcluster_close_connection;
11730     h->commit=           ndbcluster_commit;
11731     h->rollback=         ndbcluster_rollback;
11732     h->create=           ndbcluster_create_handler; /* Create a new handler */
11733     h->drop_database=    ndbcluster_drop_database;  /* Drop a database */
11734     h->panic=            ndbcluster_end;            /* Panic call */
11735     h->show_status=      ndbcluster_show_status;    /* Show status */
11736     h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */
11737     h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */
11738     h->alter_table_flags=
11739       ndbcluster_alter_table_flags;                 /* Alter table flags */
11740 #if MYSQL_VERSION_ID >= 50501
11741     h->fill_is_table=    ndbcluster_fill_is_table;
11742 #else
11743     h->fill_files_table= ndbcluster_fill_files_table;
11744 #endif
11745     ndbcluster_binlog_init_handlerton();
11746     h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED;
11747     h->discover=         ndbcluster_discover;
11748     h->find_files=       ndbcluster_find_files;
11749     h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
11750 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
11751     h->make_pushed_join= ndbcluster_make_pushed_join;
11752 #endif
11753   }
11754 
11755   // Initialize ndb interface
11756   ndb_init_internal();
11757 
11758   /* allocate connection resources and connect to cluster */
11759   const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);
11760   if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,
11761                          opt_ndb_cluster_connection_pool,
11762                          (global_opti_node_select & 1),
11763                          opt_ndb_connectstring,
11764                          opt_ndb_nodeid))
11765   {
11766     DBUG_PRINT("error", ("Could not initiate connection to cluster"));
11767     goto ndbcluster_init_error;
11768   }
11769 
11770   (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,
11771                       (my_hash_get_key) ndbcluster_get_key,0,0);
11772   /* start the ndb injector thread */
11773   if (ndbcluster_binlog_start())
11774   {
11775     DBUG_PRINT("error", ("Could start the injector thread"));
11776     goto ndbcluster_init_error;
11777   }
11778 
11779   // Create utility thread
11780   pthread_t tmp;
11781   if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
11782   {
11783     DBUG_PRINT("error", ("Could not create ndb utility thread"));
11784     my_hash_free(&ndbcluster_open_tables);
11785     pthread_mutex_destroy(&ndbcluster_mutex);
11786     pthread_mutex_destroy(&LOCK_ndb_util_thread);
11787     pthread_cond_destroy(&COND_ndb_util_thread);
11788     pthread_cond_destroy(&COND_ndb_util_ready);
11789     pthread_cond_destroy(&COND_ndb_setup_complete);
11790     goto ndbcluster_init_error;
11791   }
11792 
11793   /* Wait for the util thread to start */
11794   pthread_mutex_lock(&LOCK_ndb_util_thread);
11795   while (ndb_util_thread_running < 0)
11796     pthread_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread);
11797   pthread_mutex_unlock(&LOCK_ndb_util_thread);
11798 
11799   if (!ndb_util_thread_running)
11800   {
11801     DBUG_PRINT("error", ("ndb utility thread exited prematurely"));
11802     my_hash_free(&ndbcluster_open_tables);
11803     pthread_mutex_destroy(&ndbcluster_mutex);
11804     pthread_mutex_destroy(&LOCK_ndb_util_thread);
11805     pthread_cond_destroy(&COND_ndb_util_thread);
11806     pthread_cond_destroy(&COND_ndb_util_ready);
11807     pthread_cond_destroy(&COND_ndb_setup_complete);
11808     goto ndbcluster_init_error;
11809   }
11810 
11811   // Create index statistics thread
11812   pthread_t tmp2;
11813   if (pthread_create(&tmp2, &connection_attrib, ndb_index_stat_thread_func, 0))
11814   {
11815     DBUG_PRINT("error", ("Could not create ndb index statistics thread"));
11816     my_hash_free(&ndbcluster_open_tables);
11817     pthread_mutex_destroy(&ndbcluster_mutex);
11818     pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11819     pthread_cond_destroy(&COND_ndb_index_stat_thread);
11820     pthread_cond_destroy(&COND_ndb_index_stat_ready);
11821     pthread_mutex_destroy(&ndb_index_stat_list_mutex);
11822     pthread_mutex_destroy(&ndb_index_stat_stat_mutex);
11823     pthread_cond_destroy(&ndb_index_stat_stat_cond);
11824     goto ndbcluster_init_error;
11825   }
11826 
11827   /* Wait for the index statistics thread to start */
11828   pthread_mutex_lock(&LOCK_ndb_index_stat_thread);
11829   while (ndb_index_stat_thread_running < 0)
11830     pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);
11831   pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);
11832 
11833   if (!ndb_index_stat_thread_running)
11834   {
11835     DBUG_PRINT("error", ("ndb index statistics thread exited prematurely"));
11836     my_hash_free(&ndbcluster_open_tables);
11837     pthread_mutex_destroy(&ndbcluster_mutex);
11838     pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11839     pthread_cond_destroy(&COND_ndb_index_stat_thread);
11840     pthread_cond_destroy(&COND_ndb_index_stat_ready);
11841     pthread_mutex_destroy(&ndb_index_stat_list_mutex);
11842     pthread_mutex_destroy(&ndb_index_stat_stat_mutex);
11843     pthread_cond_destroy(&ndb_index_stat_stat_cond);
11844     goto ndbcluster_init_error;
11845   }
11846 
11847 #ifndef NDB_NO_WAIT_SETUP
11848   ndb_wait_setup_func= ndb_wait_setup_func_impl;
11849 #endif
11850 
11851   memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));
11852 
11853   ndbcluster_inited= 1;
11854   DBUG_RETURN(FALSE);
11855 
11856 ndbcluster_init_error:
11857   /* disconnect from cluster and free connection resources */
11858   ndbcluster_disconnect();
11859   ndbcluster_hton->state= SHOW_OPTION_DISABLED;               // If we couldn't use handler
11860 
11861   ndbcluster_global_schema_lock_deinit();
11862 
11863   DBUG_RETURN(TRUE);
11864 }
11865 
11866 #ifndef DBUG_OFF
11867 static
11868 const char*
get_share_state_string(NDB_SHARE_STATE s)11869 get_share_state_string(NDB_SHARE_STATE s)
11870 {
11871   switch(s) {
11872   case NSS_INITIAL:
11873     return "NSS_INITIAL";
11874   case NSS_ALTERED:
11875     return "NSS_ALTERED";
11876   case NSS_DROPPED:
11877     return "NSS_DROPPED";
11878   }
11879   assert(false);
11880   return "<unknown>";
11881 }
11882 #endif
11883 
11884 int ndbcluster_binlog_end(THD *thd);
11885 
ndbcluster_end(handlerton * hton,ha_panic_function type)11886 static int ndbcluster_end(handlerton *hton, ha_panic_function type)
11887 {
11888   DBUG_ENTER("ndbcluster_end");
11889 
11890   if (!ndbcluster_inited)
11891     DBUG_RETURN(0);
11892   ndbcluster_inited= 0;
11893 
11894   /* wait for index stat thread to finish */
11895   sql_print_information("Stopping Cluster Index Statistics thread");
11896   pthread_mutex_lock(&LOCK_ndb_index_stat_thread);
11897   ndbcluster_terminating= 1;
11898   pthread_cond_signal(&COND_ndb_index_stat_thread);
11899   while (ndb_index_stat_thread_running > 0)
11900     pthread_cond_wait(&COND_ndb_index_stat_ready, &LOCK_ndb_index_stat_thread);
11901   pthread_mutex_unlock(&LOCK_ndb_index_stat_thread);
11902 
11903   /* wait for util and binlog thread to finish */
11904   ndbcluster_binlog_end(NULL);
11905 
11906   {
11907     pthread_mutex_lock(&ndbcluster_mutex);
11908     uint save = ndbcluster_open_tables.records; (void)save;
11909     while (ndbcluster_open_tables.records)
11910     {
11911       NDB_SHARE *share=
11912         (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);
11913 #ifndef DBUG_OFF
11914       fprintf(stderr,
11915               "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
11916               share->key, share->use_count,
11917               get_share_state_string(share->state),
11918               (uint)share->state);
11919 #endif
11920       ndbcluster_real_free_share(&share);
11921     }
11922     pthread_mutex_unlock(&ndbcluster_mutex);
11923     DBUG_ASSERT(save == 0);
11924   }
11925   my_hash_free(&ndbcluster_open_tables);
11926 
11927   ndb_index_stat_end();
11928   ndbcluster_disconnect();
11929 
11930   ndbcluster_global_schema_lock_deinit();
11931 
11932   // cleanup ndb interface
11933   ndb_end_internal();
11934 
11935   pthread_mutex_destroy(&ndbcluster_mutex);
11936   pthread_mutex_destroy(&LOCK_ndb_util_thread);
11937   pthread_cond_destroy(&COND_ndb_util_thread);
11938   pthread_cond_destroy(&COND_ndb_util_ready);
11939   pthread_cond_destroy(&COND_ndb_setup_complete);
11940   pthread_mutex_destroy(&LOCK_ndb_index_stat_thread);
11941   pthread_cond_destroy(&COND_ndb_index_stat_thread);
11942   pthread_cond_destroy(&COND_ndb_index_stat_ready);
11943 
11944   DBUG_RETURN(0);
11945 }
11946 
print_error(int error,myf errflag)11947 void ha_ndbcluster::print_error(int error, myf errflag)
11948 {
11949   DBUG_ENTER("ha_ndbcluster::print_error");
11950   DBUG_PRINT("enter", ("error: %d", error));
11951 
11952   if (error == HA_ERR_NO_PARTITION_FOUND)
11953     m_part_info->print_no_partition_found(table);
11954   else
11955   {
11956     if (error == HA_ERR_FOUND_DUPP_KEY &&
11957         (table == NULL || table->file == NULL))
11958     {
11959       /*
11960         This is a sideffect of 'ndbcluster_print_error' (called from
11961         'ndbcluster_commit' and 'ndbcluster_rollback') which realises
11962         that it "knows nothing" and creates a brand new ha_ndbcluster
11963         in order to be able to call the print_error() function.
11964         Unfortunately the new ha_ndbcluster hasn't been open()ed
11965         and thus table pointer etc. is not set. Since handler::print_error()
11966         will use that pointer without checking for NULL(it naturally
11967         assumes an error can only be returned when the handler is open)
11968         this would crash the mysqld unless it's handled here.
11969       */
11970       my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);
11971       DBUG_VOID_RETURN;
11972     }
11973 
11974     handler::print_error(error, errflag);
11975   }
11976   DBUG_VOID_RETURN;
11977 }
11978 
11979 
11980 /**
11981   Static error print function called from static handler method
11982   ndbcluster_commit and ndbcluster_rollback.
11983 */
11984 
ndbcluster_print_error(int error,const NdbOperation * error_op)11985 void ndbcluster_print_error(int error, const NdbOperation *error_op)
11986 {
11987   DBUG_ENTER("ndbcluster_print_error");
11988   TABLE_SHARE share;
11989   const char *tab_name= (error_op) ? error_op->getTableName() : "";
11990   if (tab_name == NULL)
11991   {
11992     DBUG_ASSERT(tab_name != NULL);
11993     tab_name= "";
11994   }
11995   share.db.str= (char*) "";
11996   share.db.length= 0;
11997   share.table_name.str= (char *) tab_name;
11998   share.table_name.length= strlen(tab_name);
11999   ha_ndbcluster error_handler(ndbcluster_hton, &share);
12000   error_handler.print_error(error, MYF(0));
12001   DBUG_VOID_RETURN;
12002 }
12003 
12004 /**
12005   Set a given location from full pathname to database name.
12006 */
12007 
set_dbname(const char * path_name,char * dbname)12008 void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
12009 {
12010   char *end, *ptr, *tmp_name;
12011   char tmp_buff[FN_REFLEN + 1];
12012 
12013   tmp_name= tmp_buff;
12014   /* Scan name from the end */
12015   ptr= strend(path_name)-1;
12016   while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12017     ptr--;
12018   }
12019   ptr--;
12020   end= ptr;
12021   while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12022     ptr--;
12023   }
12024   uint name_len= end - ptr;
12025   memcpy(tmp_name, ptr + 1, name_len);
12026   tmp_name[name_len]= '\0';
12027   filename_to_tablename(tmp_name, dbname, sizeof(tmp_buff) - 1);
12028 }
12029 
12030 /**
12031   Set m_dbname from full pathname to table file.
12032 */
12033 
set_dbname(const char * path_name)12034 void ha_ndbcluster::set_dbname(const char *path_name)
12035 {
12036   set_dbname(path_name, m_dbname);
12037 }
12038 
12039 /**
12040   Set a given location from full pathname to table file.
12041 */
12042 
12043 void
set_tabname(const char * path_name,char * tabname)12044 ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
12045 {
12046   char *end, *ptr, *tmp_name;
12047   char tmp_buff[FN_REFLEN + 1];
12048 
12049   tmp_name= tmp_buff;
12050   /* Scan name from the end */
12051   end= strend(path_name)-1;
12052   ptr= end;
12053   while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
12054     ptr--;
12055   }
12056   uint name_len= end - ptr;
12057   memcpy(tmp_name, ptr + 1, end - ptr);
12058   tmp_name[name_len]= '\0';
12059   filename_to_tablename(tmp_name, tabname, sizeof(tmp_buff) - 1);
12060 }
12061 
12062 /**
12063   Set m_tabname from full pathname to table file.
12064 */
12065 
set_tabname(const char * path_name)12066 void ha_ndbcluster::set_tabname(const char *path_name)
12067 {
12068   set_tabname(path_name, m_tabname);
12069 }
12070 
12071 
12072 /*
12073   If there are no stored stats, should we do a tree-dive on all db
12074   nodes.  The result is fairly good but does mean a round-trip.
12075  */
12076 static const bool g_ndb_records_in_range_tree_dive= false;
12077 
12078 /* Determine roughly how many records are in the range specified */
12079 ha_rows
records_in_range(uint inx,key_range * min_key,key_range * max_key)12080 ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
12081                                 key_range *max_key)
12082 {
12083   KEY *key_info= table->key_info + inx;
12084   uint key_length= key_info->key_length;
12085   NDB_INDEX_TYPE idx_type= get_index_type(inx);
12086 
12087   DBUG_ENTER("records_in_range");
12088   // Prevent partial read of hash indexes by returning HA_POS_ERROR
12089   if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
12090       ((min_key && min_key->length < key_length) ||
12091        (max_key && max_key->length < key_length)))
12092     DBUG_RETURN(HA_POS_ERROR);
12093 
12094   // Read from hash index with full key
12095   // This is a "const" table which returns only one record!
12096   if ((idx_type != ORDERED_INDEX) &&
12097       ((min_key && min_key->length == key_length) &&
12098        (max_key && max_key->length == key_length) &&
12099        (min_key->key==max_key->key ||
12100         memcmp(min_key->key, max_key->key, key_length)==0)))
12101     DBUG_RETURN(1);
12102 
12103   // XXX why this if
12104   if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
12105        idx_type == UNIQUE_ORDERED_INDEX ||
12106        idx_type == ORDERED_INDEX))
12107   {
12108     THD *thd= current_thd;
12109     const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
12110                                   THDVAR(thd, index_stat_enable);
12111 
12112     if (index_stat_enable)
12113     {
12114       ha_rows rows= HA_POS_ERROR;
12115       int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);
12116       if (err == 0)
12117       {
12118         /**
12119          * optmizer thinks that all values < 2 are exact...but
12120          * but we don't provide exact statistics
12121          */
12122         if (rows < 2)
12123           rows = 2;
12124         DBUG_RETURN(rows);
12125       }
12126       if (err != 0 &&
12127           /* no stats is not unexpected error */
12128           err != NdbIndexStat::NoIndexStats &&
12129           /* warning was printed at first error */
12130           err != Ndb_index_stat_error_HAS_ERROR)
12131       {
12132         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
12133                             ER_CANT_GET_STAT, /* pun? */
12134                             "index stats (RIR) for key %s:"
12135                             " unexpected error %d",
12136                             key_info->name, err);
12137       }
12138       /*fall through*/
12139     }
12140 
12141     if (g_ndb_records_in_range_tree_dive)
12142     {
12143       NDB_INDEX_DATA& d=m_index[inx];
12144       const NDBINDEX* index= d.index;
12145       Ndb *ndb= get_ndb(thd);
12146       NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;
12147       NdbTransaction* trans=NULL;
12148       int res=0;
12149       Uint64 rows;
12150 
12151       do
12152       {
12153         if ((trans=active_trans) == NULL ||
12154             trans->commitStatus() != NdbTransaction::Started)
12155         {
12156           DBUG_PRINT("info", ("no active trans"));
12157           if (! (trans=ndb->startTransaction()))
12158             ERR_BREAK(ndb->getNdbError(), res);
12159         }
12160 
12161         /* Create an IndexBound struct for the keys */
12162         NdbIndexScanOperation::IndexBound ib;
12163         compute_index_bounds(ib,
12164                              key_info,
12165                              min_key,
12166                              max_key,
12167                              0);
12168 
12169         ib.range_no= 0;
12170 
12171         NdbIndexStat is;
12172         if (is.records_in_range(index,
12173                                 trans,
12174                                 d.ndb_record_key,
12175                                 m_ndb_record,
12176                                 &ib,
12177                                 0,
12178                                 &rows,
12179                                 0) == -1)
12180           ERR_BREAK(is.getNdbError(), res);
12181       } while (0);
12182 
12183       if (trans != active_trans && rows == 0)
12184         rows = 1;
12185       if (trans != active_trans && trans != NULL)
12186         ndb->closeTransaction(trans);
12187       if (res == 0)
12188         DBUG_RETURN(rows);
12189       /*fall through*/
12190     }
12191   }
12192 
12193   /* Use simple heuristics to estimate fraction
12194      of 'stats.record' returned from range.
12195   */
12196   do
12197   {
12198     if (stats.records == ~(ha_rows)0 || stats.records == 0)
12199     {
12200       /* Refresh statistics, only read from datanodes if 'use_exact_count' */
12201       THD *thd= current_thd;
12202       if (update_stats(thd, THDVAR(thd, use_exact_count)))
12203         break;
12204     }
12205 
12206     Uint64 rows;
12207     Uint64 table_rows= stats.records;
12208     size_t eq_bound_len= 0;
12209     size_t min_key_length= (min_key) ? min_key->length : 0;
12210     size_t max_key_length= (max_key) ? max_key->length : 0;
12211 
12212     // Might have an closed/open range bound:
12213     // Low range open
12214     if (!min_key_length)
12215     {
12216       rows= (!max_key_length)
12217            ? table_rows             // No range was specified
12218            : table_rows/10;         // -oo .. <high range> -> 10% selectivity
12219     }
12220     // High range open
12221     else if (!max_key_length)
12222     {
12223       rows= table_rows/10;          // <low range>..oo -> 10% selectivity
12224     }
12225     else
12226     {
12227       size_t bounds_len= MIN(min_key_length,max_key_length);
12228       uint eq_bound_len= 0;
12229       uint eq_bound_offs= 0;
12230 
12231       KEY_PART_INFO* key_part= key_info->key_part;
12232       KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
12233       for (; key_part != end; key_part++)
12234       {
12235         uint part_length= key_part->store_length;
12236         if (eq_bound_offs+part_length > bounds_len ||
12237             memcmp(&min_key->key[eq_bound_offs],
12238                    &max_key->key[eq_bound_offs],
12239                    part_length))
12240         {
12241           break;
12242         }
12243         eq_bound_len+= key_part->length;
12244         eq_bound_offs+= part_length;
12245       }
12246 
12247       if (!eq_bound_len)
12248       {
12249         rows= table_rows/20;        // <low range>..<high range> -> 5%
12250       }
12251       else
12252       {
12253         // Has an equality range on a leading part of 'key_length':
12254         // - Assume reduced selectivity for non-unique indexes
12255         //   by decreasing 'eq_fraction' by 20%
12256         // - Assume equal selectivity for all eq_parts in key.
12257 
12258         double eq_fraction = (double)(eq_bound_len) / key_length;
12259         if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
12260           eq_fraction/= 1.20;
12261         if (eq_fraction >= 1.0)        // Exact match -> 1 row
12262           DBUG_RETURN(1);
12263 
12264         rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));
12265         if (rows > (table_rows/50))    // EQ-range: Max 2% of rows
12266           rows= (table_rows/50);
12267 
12268         if (min_key_length > eq_bound_offs)
12269           rows/= 2;
12270         if (max_key_length > eq_bound_offs)
12271           rows/= 2;
12272       }
12273     }
12274 
12275     // Make sure that EQ is preferred even if row-count is low
12276     if (eq_bound_len && rows < 2)      // At least 2 rows as not exact
12277       rows= 2;
12278     else if (rows < 3)
12279       rows= 3;
12280     DBUG_RETURN(MIN(rows,table_rows));
12281   } while (0);
12282 
12283   DBUG_RETURN(10); /* Poor guess when you don't know anything */
12284 }
12285 
table_flags(void) const12286 ulonglong ha_ndbcluster::table_flags(void) const
12287 {
12288   THD *thd= current_thd;
12289   ulonglong f=
12290     HA_REC_NOT_IN_SEQ |
12291     HA_NULL_IN_KEY |
12292     HA_AUTO_PART_KEY |
12293     HA_NO_PREFIX_CHAR_KEYS |
12294 #ifndef NDB_WITH_NEW_MRR_INTERFACE
12295     HA_NEED_READ_RANGE_BUFFER |
12296 #endif
12297     HA_CAN_GEOMETRY |
12298     HA_CAN_BIT_FIELD |
12299     HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
12300     HA_PRIMARY_KEY_REQUIRED_FOR_DELETE |
12301     HA_PARTIAL_COLUMN_READ |
12302     HA_HAS_OWN_BINLOGGING |
12303     HA_BINLOG_ROW_CAPABLE |
12304     HA_HAS_RECORDS |
12305 #ifndef NDB_WITHOUT_ONLINE_ALTER
12306     HA_ONLINE_ALTER |
12307 #endif
12308     0;
12309 
12310   /*
12311     To allow for logging of ndb tables during stmt based logging;
12312     flag cabablity, but also turn off flag for OWN_BINLOGGING
12313   */
12314   if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
12315     f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
12316 
12317   /**
12318    * To maximize join pushability we want const-table
12319    * optimization blocked if 'ndb_join_pushdown= on'
12320    */
12321   if (THDVAR(thd, join_pushdown))
12322     f= f | HA_BLOCK_CONST_TABLE;
12323 
12324   return f;
12325 }
12326 
table_type() const12327 const char * ha_ndbcluster::table_type() const
12328 {
12329   return("NDBCLUSTER");
12330 }
max_supported_record_length() const12331 uint ha_ndbcluster::max_supported_record_length() const
12332 {
12333   return NDB_MAX_TUPLE_SIZE;
12334 }
max_supported_keys() const12335 uint ha_ndbcluster::max_supported_keys() const
12336 {
12337   return MAX_KEY;
12338 }
max_supported_key_parts() const12339 uint ha_ndbcluster::max_supported_key_parts() const
12340 {
12341   return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
12342 }
max_supported_key_length() const12343 uint ha_ndbcluster::max_supported_key_length() const
12344 {
12345   return NDB_MAX_KEY_SIZE;
12346 }
max_supported_key_part_length() const12347 uint ha_ndbcluster::max_supported_key_part_length() const
12348 {
12349   return NDB_MAX_KEY_SIZE;
12350 }
low_byte_first() const12351 bool ha_ndbcluster::low_byte_first() const
12352 {
12353 #ifdef WORDS_BIGENDIAN
12354   return FALSE;
12355 #else
12356   return TRUE;
12357 #endif
12358 }
index_type(uint key_number)12359 const char* ha_ndbcluster::index_type(uint key_number)
12360 {
12361   switch (get_index_type(key_number)) {
12362   case ORDERED_INDEX:
12363   case UNIQUE_ORDERED_INDEX:
12364   case PRIMARY_KEY_ORDERED_INDEX:
12365     return "BTREE";
12366   case UNIQUE_INDEX:
12367   case PRIMARY_KEY_INDEX:
12368   default:
12369     return "HASH";
12370   }
12371 }
12372 
table_cache_type()12373 uint8 ha_ndbcluster::table_cache_type()
12374 {
12375   DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
12376   DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
12377 }
12378 
12379 /**
12380    Retrieve the commit count for the table object.
12381 
12382    @param thd              Thread context.
12383    @param norm_name        Normalized path to the table.
12384    @param[out] commit_count Commit count for the table.
12385 
12386    @return 0 on success.
12387    @return 1 if an error occured.
12388 */
12389 
ndb_get_commitcount(THD * thd,char * norm_name,Uint64 * commit_count)12390 uint ndb_get_commitcount(THD *thd, char *norm_name,
12391                          Uint64 *commit_count)
12392 {
12393   char dbname[NAME_LEN + 1];
12394   NDB_SHARE *share;
12395   DBUG_ENTER("ndb_get_commitcount");
12396 
12397   DBUG_PRINT("enter", ("name: %s", norm_name));
12398   pthread_mutex_lock(&ndbcluster_mutex);
12399   if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
12400                                           (const uchar*) norm_name,
12401                                           strlen(norm_name))))
12402   {
12403     pthread_mutex_unlock(&ndbcluster_mutex);
12404     DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
12405                          norm_name));
12406     DBUG_RETURN(1);
12407   }
12408   /* ndb_share reference temporary, free below */
12409   share->use_count++;
12410   DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
12411                            share->key, share->use_count));
12412   pthread_mutex_unlock(&ndbcluster_mutex);
12413 
12414   pthread_mutex_lock(&share->mutex);
12415   if (opt_ndb_cache_check_time > 0)
12416   {
12417     if (share->commit_count != 0)
12418     {
12419       *commit_count= share->commit_count;
12420 #ifndef DBUG_OFF
12421       char buff[22];
12422 #endif
12423       DBUG_PRINT("info", ("Getting commit_count: %s from share",
12424                           llstr(share->commit_count, buff)));
12425       pthread_mutex_unlock(&share->mutex);
12426       /* ndb_share reference temporary free */
12427       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12428                                share->key, share->use_count));
12429       free_share(&share);
12430       DBUG_RETURN(0);
12431     }
12432   }
12433   DBUG_PRINT("info", ("Get commit_count from NDB"));
12434   Ndb *ndb;
12435   if (!(ndb= check_ndb_in_thd(thd)))
12436     DBUG_RETURN(1);
12437 
12438   ha_ndbcluster::set_dbname(norm_name, dbname);
12439   if (ndb->setDatabaseName(dbname))
12440   {
12441     ERR_RETURN(ndb->getNdbError());
12442   }
12443   uint lock= share->commit_count_lock;
12444   pthread_mutex_unlock(&share->mutex);
12445 
12446   struct Ndb_statistics stat;
12447   {
12448     char tblname[NAME_LEN + 1];
12449     ha_ndbcluster::set_tabname(norm_name, tblname);
12450     Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);
12451     if (ndbtab_g.get_table() == 0
12452         || ndb_get_table_statistics(thd, NULL,
12453                                     FALSE,
12454                                     ndb,
12455                                     ndbtab_g.get_table()->getDefaultRecord(),
12456                                     &stat))
12457     {
12458       /* ndb_share reference temporary free */
12459       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12460                                share->key, share->use_count));
12461       free_share(&share);
12462       DBUG_RETURN(1);
12463     }
12464   }
12465 
12466   pthread_mutex_lock(&share->mutex);
12467   if (share->commit_count_lock == lock)
12468   {
12469 #ifndef DBUG_OFF
12470     char buff[22];
12471 #endif
12472     DBUG_PRINT("info", ("Setting commit_count to %s",
12473                         llstr(stat.commit_count, buff)));
12474     share->commit_count= stat.commit_count;
12475     *commit_count= stat.commit_count;
12476   }
12477   else
12478   {
12479     DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
12480     *commit_count= 0;
12481   }
12482   pthread_mutex_unlock(&share->mutex);
12483   /* ndb_share reference temporary free */
12484   DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12485                            share->key, share->use_count));
12486   free_share(&share);
12487   DBUG_RETURN(0);
12488 }
12489 
12490 
12491 /**
12492   Check if a cached query can be used.
12493 
12494   This is done by comparing the supplied engine_data to commit_count of
12495   the table.
12496 
12497   The commit_count is either retrieved from the share for the table, where
12498   it has been cached by the util thread. If the util thread is not started,
12499   NDB has to be contacetd to retrieve the commit_count, this will introduce
12500   a small delay while waiting for NDB to answer.
12501 
12502 
12503   @param thd            thread handle
12504   @param full_name      normalized path to the table in the canonical
12505                         format.
12506   @param full_name_len  length of the normalized path to the table.
12507   @param engine_data    parameter retrieved when query was first inserted into
12508                         the cache. If the value of engine_data is changed,
12509                         all queries for this table should be invalidated.
12510 
12511   @retval
12512     TRUE  Yes, use the query from cache
12513   @retval
12514     FALSE No, don't use the cached query, and if engine_data
12515           has changed, all queries for this table should be invalidated
12516 
12517 */
12518 
12519 static my_bool
ndbcluster_cache_retrieval_allowed(THD * thd,char * full_name,uint full_name_len,ulonglong * engine_data)12520 ndbcluster_cache_retrieval_allowed(THD *thd,
12521                                    char *full_name, uint full_name_len,
12522                                    ulonglong *engine_data)
12523 {
12524   Uint64 commit_count;
12525   char dbname[NAME_LEN + 1];
12526   char tabname[NAME_LEN + 1];
12527 #ifndef DBUG_OFF
12528   char buff[22], buff2[22];
12529 #endif
12530 
12531   ha_ndbcluster::set_dbname(full_name, dbname);
12532   ha_ndbcluster::set_tabname(full_name, tabname);
12533 
12534   DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
12535   DBUG_PRINT("enter", ("dbname: %s, tabname: %s",
12536                        dbname, tabname));
12537 
12538   if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
12539   {
12540     /* Don't allow qc to be used if table has been previously
12541        modified in transaction */
12542     if (!check_ndb_in_thd(thd))
12543       DBUG_RETURN(FALSE);
12544    Thd_ndb *thd_ndb= get_thd_ndb(thd);
12545     if (!thd_ndb->changed_tables.is_empty())
12546     {
12547       NDB_SHARE* share;
12548       List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
12549       while ((share= it++))
12550       {
12551         if (strcmp(share->table_name, tabname) == 0 &&
12552             strcmp(share->db, dbname) == 0)
12553         {
12554           DBUG_PRINT("exit", ("No, transaction has changed table"));
12555           DBUG_RETURN(FALSE);
12556         }
12557       }
12558     }
12559   }
12560 
12561   if (ndb_get_commitcount(thd, full_name, &commit_count))
12562   {
12563     *engine_data= 0; /* invalidate */
12564     DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
12565     DBUG_RETURN(FALSE);
12566   }
12567   DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",
12568                       llstr(*engine_data, buff), llstr(commit_count, buff2)));
12569   if (commit_count == 0)
12570   {
12571     *engine_data= 0; /* invalidate */
12572     DBUG_PRINT("exit", ("No, local commit has been performed"));
12573     DBUG_RETURN(FALSE);
12574   }
12575   else if (*engine_data != commit_count)
12576   {
12577     *engine_data= commit_count; /* invalidate */
12578      DBUG_PRINT("exit", ("No, commit_count has changed"));
12579      DBUG_RETURN(FALSE);
12580    }
12581 
12582   DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",
12583                       llstr(*engine_data, buff)));
12584   DBUG_RETURN(TRUE);
12585 }
12586 
12587 
12588 /**
12589   Register a table for use in the query cache.
12590 
12591   Fetch the commit_count for the table and return it in engine_data,
12592   this will later be used to check if the table has changed, before
12593   the cached query is reused.
12594 
12595   @param thd            thread handle
12596   @param full_name      normalized path to the table in the
12597                         canonical format.
12598   @param full_name_len  length of the normalized path to the table.
12599   @param engine_callback  function to be called before using cache on
12600                           this table
12601   @param[out] engine_data    commit_count for this table
12602 
12603   @retval
12604     TRUE  Yes, it's ok to cahce this query
12605   @retval
12606     FALSE No, don't cach the query
12607 */
12608 
12609 my_bool
register_query_cache_table(THD * thd,char * full_name,uint full_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)12610 ha_ndbcluster::register_query_cache_table(THD *thd,
12611                                           char *full_name, uint full_name_len,
12612                                           qc_engine_callback *engine_callback,
12613                                           ulonglong *engine_data)
12614 {
12615   Uint64 commit_count;
12616 #ifndef DBUG_OFF
12617   char buff[22];
12618 #endif
12619   DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
12620   DBUG_PRINT("enter",("dbname: %s, tabname: %s",
12621 		      m_dbname, m_tabname));
12622 
12623   if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
12624   {
12625     /* Don't allow qc to be used if table has been previously
12626        modified in transaction */
12627     Thd_ndb *thd_ndb= get_thd_ndb(thd);
12628     if (!thd_ndb->changed_tables.is_empty())
12629     {
12630       DBUG_ASSERT(m_share);
12631       NDB_SHARE* share;
12632       List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
12633       while ((share= it++))
12634       {
12635         if (m_share == share)
12636         {
12637           DBUG_PRINT("exit", ("No, transaction has changed table"));
12638           DBUG_RETURN(FALSE);
12639         }
12640       }
12641     }
12642   }
12643 
12644   if (ndb_get_commitcount(thd, full_name, &commit_count))
12645   {
12646     *engine_data= 0;
12647     DBUG_PRINT("exit", ("Error, could not get commitcount"));
12648     DBUG_RETURN(FALSE);
12649   }
12650   *engine_data= commit_count;
12651   *engine_callback= ndbcluster_cache_retrieval_allowed;
12652   DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));
12653   DBUG_RETURN(commit_count > 0);
12654 }
12655 
12656 
12657 /**
12658   Handling the shared NDB_SHARE structure that is needed to
12659   provide table locking.
12660 
12661   It's also used for sharing data with other NDB handlers
12662   in the same MySQL Server. There is currently not much
12663   data we want to or can share.
12664 */
12665 
ndbcluster_get_key(NDB_SHARE * share,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))12666 static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
12667                                 my_bool not_used MY_ATTRIBUTE((unused)))
12668 {
12669   *length= share->key_length;
12670   return (uchar*) share->key;
12671 }
12672 
12673 
12674 #ifndef DBUG_OFF
12675 
print_share(const char * where,NDB_SHARE * share)12676 static void print_share(const char* where, NDB_SHARE* share)
12677 {
12678   fprintf(DBUG_FILE,
12679           "%s %s.%s: use_count: %u, commit_count: %lu\n",
12680           where, share->db, share->table_name, share->use_count,
12681           (ulong) share->commit_count);
12682   fprintf(DBUG_FILE,
12683           "  - key: %s, key_length: %d\n",
12684           share->key, share->key_length);
12685 
12686   Ndb_event_data *event_data= 0;
12687   if (share->event_data)
12688     event_data= share->event_data;
12689   else if (share->op)
12690     event_data= (Ndb_event_data *) share->op->getCustomData();
12691   if (event_data)
12692   {
12693     fprintf(DBUG_FILE,
12694             "  - event_data->shadow_table: %p %s.%s\n",
12695             event_data->shadow_table, event_data->shadow_table->s->db.str,
12696             event_data->shadow_table->s->table_name.str);
12697   }
12698 }
12699 
12700 
print_ndbcluster_open_tables()12701 static void print_ndbcluster_open_tables()
12702 {
12703   DBUG_LOCK_FILE;
12704   fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
12705   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
12706     print_share("",
12707                 (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i));
12708   fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
12709   DBUG_UNLOCK_FILE;
12710 }
12711 
12712 #endif
12713 
12714 
12715 #define dbug_print_open_tables()                \
12716   DBUG_EXECUTE("info",                          \
12717                print_ndbcluster_open_tables(););
12718 
12719 #define dbug_print_share(t, s)                  \
12720   DBUG_LOCK_FILE;                               \
12721   DBUG_EXECUTE("info",                          \
12722                print_share((t), (s)););         \
12723   DBUG_UNLOCK_FILE;
12724 
12725 
12726 /*
12727   For some reason a share is still around, try to salvage the situation
12728   by closing all cached tables. If the share still exists, there is an
12729   error somewhere but only report this to the error log.  Keep this
12730   "trailing share" but rename it since there are still references to it
12731   to avoid segmentation faults.  There is a risk that the memory for
12732   this trailing share leaks.
12733 
12734   Must be called with previous pthread_mutex_lock(&ndbcluster_mutex)
12735 */
handle_trailing_share(THD * thd,NDB_SHARE * share)12736 int handle_trailing_share(THD *thd, NDB_SHARE *share)
12737 {
12738   static ulong trailing_share_id= 0;
12739   DBUG_ENTER("handle_trailing_share");
12740 
12741   /* ndb_share reference temporary, free below */
12742   ++share->use_count;
12743   if (opt_ndb_extra_logging > 9)
12744     sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12745   DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
12746                            share->key, share->use_count));
12747   pthread_mutex_unlock(&ndbcluster_mutex);
12748 
12749   TABLE_LIST table_list;
12750   memset(&table_list, 0, sizeof(table_list));
12751   table_list.db= share->db;
12752   table_list.alias= table_list.table_name= share->table_name;
12753   close_cached_tables(thd, &table_list, TRUE, FALSE, FALSE);
12754 
12755   pthread_mutex_lock(&ndbcluster_mutex);
12756   /* ndb_share reference temporary free */
12757   DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
12758                            share->key, share->use_count));
12759   if (!--share->use_count)
12760   {
12761     if (opt_ndb_extra_logging > 9)
12762       sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12763     if (opt_ndb_extra_logging)
12764       sql_print_information("NDB_SHARE: trailing share "
12765                             "%s(connect_count: %u) "
12766                             "released by close_cached_tables at "
12767                             "connect_count: %u",
12768                             share->key,
12769                             share->connect_count,
12770                             g_ndb_cluster_connection->get_connect_count());
12771     ndbcluster_real_free_share(&share);
12772     DBUG_RETURN(0);
12773   }
12774   if (opt_ndb_extra_logging > 9)
12775     sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12776 
12777   /*
12778     share still exists, if share has not been dropped by server
12779     release that share
12780   */
12781   if (share->state != NSS_DROPPED)
12782   {
12783     share->state= NSS_DROPPED;
12784     /* ndb_share reference create free */
12785     DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
12786                              share->key, share->use_count));
12787     --share->use_count;
12788     if (opt_ndb_extra_logging > 9)
12789       sql_print_information ("handle_trailing_share: %s use_count: %u", share->key, share->use_count);
12790 
12791     if (share->use_count == 0)
12792     {
12793       if (opt_ndb_extra_logging)
12794         sql_print_information("NDB_SHARE: trailing share "
12795                               "%s(connect_count: %u) "
12796                               "released after NSS_DROPPED check "
12797                               "at connect_count: %u",
12798                               share->key,
12799                               share->connect_count,
12800                               g_ndb_cluster_connection->get_connect_count());
12801       ndbcluster_real_free_share(&share);
12802       DBUG_RETURN(0);
12803     }
12804   }
12805 
12806   DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",
12807                       share->key, share->use_count, (long) share->op));
12808   /*
12809      Ignore table shares only opened by util thread
12810    */
12811   if (!((share->use_count == 1) && share->util_thread))
12812   {
12813 #ifdef NDB_LOG_TRAILING_SHARE_ERRORS
12814     sql_print_warning("NDB_SHARE: %s already exists use_count=%d."
12815                       " Moving away for safety, but possible memleak.",
12816                       share->key, share->use_count);
12817 #endif
12818   }
12819   dbug_print_open_tables();
12820 
12821   /*
12822     Ndb share has not been released as it should
12823   */
12824 #ifdef NOT_YET
12825   DBUG_ASSERT(FALSE);
12826 #endif
12827 
12828   /*
12829     This is probably an error.  We can however save the situation
12830     at the cost of a possible mem leak, by "renaming" the share
12831     - First remove from hash
12832   */
12833   my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
12834 
12835   /*
12836     now give it a new name, just a running number
12837     if space is not enough allocate some more
12838   */
12839   {
12840     const uint min_key_length= 10;
12841     if (share->key_length < min_key_length)
12842     {
12843       share->key= (char*) alloc_root(&share->mem_root, min_key_length + 1);
12844       share->key_length= min_key_length;
12845     }
12846     share->key_length=
12847       my_snprintf(share->key, min_key_length + 1, "#leak%lu",
12848                   trailing_share_id++);
12849   }
12850   /* Keep it for possible the future trailing free */
12851   my_hash_insert(&ndbcluster_open_tables, (uchar*) share);
12852 
12853   DBUG_RETURN(0);
12854 }
12855 
12856 /*
12857   Rename share is used during rename table.
12858 */
ndbcluster_prepare_rename_share(NDB_SHARE * share,const char * new_key)12859 int ndbcluster_prepare_rename_share(NDB_SHARE *share, const char *new_key)
12860 {
12861   /*
12862     allocate and set the new key, db etc
12863     enough space for key, db, and table_name
12864   */
12865   uint new_length= (uint) strlen(new_key);
12866   share->new_key= (char*) alloc_root(&share->mem_root, 2 * (new_length + 1));
12867   strmov(share->new_key, new_key);
12868   return 0;
12869 }
12870 
ndbcluster_undo_rename_share(THD * thd,NDB_SHARE * share)12871 int ndbcluster_undo_rename_share(THD *thd, NDB_SHARE *share)
12872 {
12873   share->new_key= share->old_names;
12874   ndbcluster_rename_share(thd, share);
12875   return 0;
12876 }
12877 
ndbcluster_rename_share(THD * thd,NDB_SHARE * share)12878 int ndbcluster_rename_share(THD *thd, NDB_SHARE *share)
12879 {
12880   NDB_SHARE *tmp;
12881   pthread_mutex_lock(&ndbcluster_mutex);
12882   uint new_length= (uint) strlen(share->new_key);
12883   DBUG_PRINT("ndbcluster_rename_share", ("old_key: %s  old__length: %d",
12884                               share->key, share->key_length));
12885   if ((tmp= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
12886                                         (const uchar*) share->new_key,
12887                                         new_length)))
12888     handle_trailing_share(thd, tmp);
12889 
12890   /* remove the share from hash */
12891   my_hash_delete(&ndbcluster_open_tables, (uchar*) share);
12892   dbug_print_open_tables();
12893 
12894   /* save old stuff if insert should fail */
12895   uint old_length= share->key_length;
12896   char *old_key= share->key;
12897 
12898   share->key= share->new_key;
12899   share->key_length= new_length;
12900 
12901   if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
12902   {
12903     // ToDo free the allocated stuff above?
12904     DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",
12905                          share->key));
12906     share->key= old_key;
12907     share->key_length= old_length;
12908     if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
12909     {
12910       sql_print_error("ndbcluster_rename_share: failed to recover %s", share->key);
12911       DBUG_PRINT("error", ("ndbcluster_rename_share: my_hash_insert %s failed",
12912                            share->key));
12913     }
12914     dbug_print_open_tables();
12915     pthread_mutex_unlock(&ndbcluster_mutex);
12916     return -1;
12917   }
12918   dbug_print_open_tables();
12919 
12920   share->db= share->key + new_length + 1;
12921   ha_ndbcluster::set_dbname(share->new_key, share->db);
12922   share->table_name= share->db + strlen(share->db) + 1;
12923   ha_ndbcluster::set_tabname(share->new_key, share->table_name);
12924 
12925   dbug_print_share("ndbcluster_rename_share:", share);
12926   Ndb_event_data *event_data= 0;
12927   if (share->event_data)
12928     event_data= share->event_data;
12929   else if (share->op)
12930     event_data= (Ndb_event_data *) share->op->getCustomData();
12931   if (event_data && event_data->shadow_table)
12932   {
12933     if (!IS_TMP_PREFIX(share->table_name))
12934     {
12935       event_data->shadow_table->s->db.str= share->db;
12936       event_data->shadow_table->s->db.length= strlen(share->db);
12937       event_data->shadow_table->s->table_name.str= share->table_name;
12938       event_data->shadow_table->s->table_name.length= strlen(share->table_name);
12939     }
12940     else
12941     {
12942       /**
12943        * we don't rename the table->s here
12944        *   that is used by injector
12945        *   as we don't know if all events has been processed
12946        * This will be dropped anyway
12947        */
12948     }
12949   }
12950   /* else rename will be handled when the ALTER event comes */
12951   share->old_names= old_key;
12952   // ToDo free old_names after ALTER EVENT
12953 
12954   if (opt_ndb_extra_logging > 9)
12955     sql_print_information ("ndbcluster_rename_share: %s-%s use_count: %u", old_key, share->key, share->use_count);
12956 
12957   pthread_mutex_unlock(&ndbcluster_mutex);
12958   return 0;
12959 }
12960 
12961 /*
12962   Increase refcount on existing share.
12963   Always returns share and cannot fail.
12964 */
ndbcluster_get_share(NDB_SHARE * share)12965 NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
12966 {
12967   pthread_mutex_lock(&ndbcluster_mutex);
12968   share->use_count++;
12969 
12970   dbug_print_open_tables();
12971   dbug_print_share("ndbcluster_get_share:", share);
12972   if (opt_ndb_extra_logging > 9)
12973     sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);
12974   pthread_mutex_unlock(&ndbcluster_mutex);
12975   return share;
12976 }
12977 
12978 
12979 /*
12980   Get a share object for key
12981 
12982   Returns share for key, and increases the refcount on the share.
12983 
12984   create_if_not_exists == TRUE:
12985     creates share if it does not alreade exist
12986     returns 0 only due to out of memory, and then sets my_error
12987 
12988   create_if_not_exists == FALSE:
12989     returns 0 if share does not exist
12990 
12991   have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken
12992 */
12993 
ndbcluster_get_share(const char * key,TABLE * table,bool create_if_not_exists,bool have_lock)12994 NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
12995                                 bool create_if_not_exists,
12996                                 bool have_lock)
12997 {
12998   NDB_SHARE *share;
12999   uint length= (uint) strlen(key);
13000   DBUG_ENTER("ndbcluster_get_share");
13001   DBUG_PRINT("enter", ("key: '%s'", key));
13002 
13003   if (!have_lock)
13004     pthread_mutex_lock(&ndbcluster_mutex);
13005   if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
13006                                            (const uchar*) key,
13007                                            length)))
13008   {
13009     if (!create_if_not_exists)
13010     {
13011       DBUG_PRINT("error", ("get_share: %s does not exist", key));
13012       if (!have_lock)
13013         pthread_mutex_unlock(&ndbcluster_mutex);
13014       DBUG_RETURN(0);
13015     }
13016     if ((share= (NDB_SHARE*) my_malloc(sizeof(*share),
13017                                        MYF(MY_WME | MY_ZEROFILL))))
13018     {
13019       MEM_ROOT **root_ptr=
13020         my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC);
13021       MEM_ROOT *old_root= *root_ptr;
13022       init_sql_alloc(&share->mem_root, 1024, 0);
13023       *root_ptr= &share->mem_root; // remember to reset before return
13024       share->flags= 0;
13025       share->state= NSS_INITIAL;
13026       /* enough space for key, db, and table_name */
13027       share->key= (char*) alloc_root(*root_ptr, 2 * (length + 1));
13028       share->key_length= length;
13029       strmov(share->key, key);
13030       if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
13031       {
13032         free_root(&share->mem_root, MYF(0));
13033         my_free((uchar*) share, 0);
13034         *root_ptr= old_root;
13035         if (!have_lock)
13036           pthread_mutex_unlock(&ndbcluster_mutex);
13037         DBUG_RETURN(0);
13038       }
13039       thr_lock_init(&share->lock);
13040       pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
13041       share->commit_count= 0;
13042       share->commit_count_lock= 0;
13043       share->db= share->key + length + 1;
13044       ha_ndbcluster::set_dbname(key, share->db);
13045       share->table_name= share->db + strlen(share->db) + 1;
13046       ha_ndbcluster::set_tabname(key, share->table_name);
13047       if (ndbcluster_binlog_init_share(current_thd, share, table))
13048       {
13049         DBUG_PRINT("error", ("get_share: %s could not init share", key));
13050         ndbcluster_real_free_share(&share);
13051         *root_ptr= old_root;
13052         if (!have_lock)
13053           pthread_mutex_unlock(&ndbcluster_mutex);
13054         DBUG_RETURN(0);
13055       }
13056       *root_ptr= old_root;
13057     }
13058     else
13059     {
13060       DBUG_PRINT("error", ("get_share: failed to alloc share"));
13061       if (!have_lock)
13062         pthread_mutex_unlock(&ndbcluster_mutex);
13063       my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));
13064       DBUG_RETURN(0);
13065     }
13066   }
13067   share->use_count++;
13068   if (opt_ndb_extra_logging > 9)
13069     sql_print_information ("ndbcluster_get_share: %s use_count: %u", share->key, share->use_count);
13070 
13071   dbug_print_open_tables();
13072   dbug_print_share("ndbcluster_get_share:", share);
13073   if (!have_lock)
13074     pthread_mutex_unlock(&ndbcluster_mutex);
13075   DBUG_RETURN(share);
13076 }
13077 
13078 
ndbcluster_real_free_share(NDB_SHARE ** share)13079 void ndbcluster_real_free_share(NDB_SHARE **share)
13080 {
13081   DBUG_ENTER("ndbcluster_real_free_share");
13082   dbug_print_share("ndbcluster_real_free_share:", *share);
13083 
13084   if (opt_ndb_extra_logging > 9)
13085     sql_print_information ("ndbcluster_real_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13086 
13087   ndb_index_stat_free(*share);
13088 
13089   my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);
13090   thr_lock_delete(&(*share)->lock);
13091   pthread_mutex_destroy(&(*share)->mutex);
13092 
13093 #ifdef HAVE_NDB_BINLOG
13094   if ((*share)->m_cfn_share && (*share)->m_cfn_share->m_ex_tab && g_ndb)
13095   {
13096     NDBDICT *dict= g_ndb->getDictionary();
13097     dict->removeTableGlobal(*(*share)->m_cfn_share->m_ex_tab, 0);
13098     (*share)->m_cfn_share->m_ex_tab= 0;
13099   }
13100 #endif
13101   (*share)->new_op= 0;
13102   if ((*share)->event_data)
13103   {
13104     delete (*share)->event_data;
13105     (*share)->event_data= 0;
13106   }
13107   free_root(&(*share)->mem_root, MYF(0));
13108   my_free((uchar*) *share, MYF(0));
13109   *share= 0;
13110 
13111   dbug_print_open_tables();
13112   DBUG_VOID_RETURN;
13113 }
13114 
13115 
ndbcluster_free_share(NDB_SHARE ** share,bool have_lock)13116 void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
13117 {
13118   if (!have_lock)
13119     pthread_mutex_lock(&ndbcluster_mutex);
13120   if (!--(*share)->use_count)
13121   {
13122     if (opt_ndb_extra_logging > 9)
13123       sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13124     ndbcluster_real_free_share(share);
13125   }
13126   else
13127   {
13128     if (opt_ndb_extra_logging > 9)
13129       sql_print_information ("ndbcluster_free_share: %s use_count: %u", (*share)->key, (*share)->use_count);
13130     dbug_print_open_tables();
13131     dbug_print_share("ndbcluster_free_share:", *share);
13132   }
13133   if (!have_lock)
13134     pthread_mutex_unlock(&ndbcluster_mutex);
13135 }
13136 
13137 
13138 struct ndb_table_statistics_row {
13139   Uint64 rows;
13140   Uint64 commits;
13141   Uint32 size;
13142   Uint64 fixed_mem;
13143   Uint64 var_mem;
13144 };
13145 
update_stats(THD * thd,bool do_read_stat,bool have_lock,uint part_id)13146 int ha_ndbcluster::update_stats(THD *thd,
13147                                 bool do_read_stat,
13148                                 bool have_lock,
13149                                 uint part_id)
13150 {
13151   struct Ndb_statistics stat;
13152   Thd_ndb *thd_ndb= get_thd_ndb(thd);
13153   DBUG_ENTER("ha_ndbcluster::update_stats");
13154   do
13155   {
13156     if (m_share && !do_read_stat)
13157     {
13158       pthread_mutex_lock(&m_share->mutex);
13159       stat= m_share->stat;
13160       pthread_mutex_unlock(&m_share->mutex);
13161 
13162       DBUG_ASSERT(stat.row_count != ~(ha_rows)0); // should never be invalid
13163 
13164       /* Accept shared cached statistics if row_count is valid. */
13165       if (stat.row_count != ~(ha_rows)0)
13166         break;
13167     }
13168 
13169     /* Request statistics from datanodes */
13170     Ndb *ndb= thd_ndb->ndb;
13171     if (ndb->setDatabaseName(m_dbname))
13172     {
13173       DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM);
13174     }
13175     if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,
13176                                           m_ndb_record, &stat,
13177                                           have_lock, part_id))
13178     {
13179       DBUG_RETURN(err);
13180     }
13181 
13182     /* Update shared statistics with fresh data */
13183     if (m_share)
13184     {
13185       pthread_mutex_lock(&m_share->mutex);
13186       m_share->stat= stat;
13187       pthread_mutex_unlock(&m_share->mutex);
13188     }
13189     break;
13190   }
13191   while(0);
13192 
13193   int no_uncommitted_rows_count= 0;
13194   if (m_table_info && !thd_ndb->m_error)
13195   {
13196     m_table_info->records= stat.row_count;
13197     m_table_info->last_count= thd_ndb->count;
13198     no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;
13199   }
13200   stats.mean_rec_length= stat.row_size;
13201   stats.data_file_length= stat.fragment_memory;
13202   stats.records= stat.row_count + no_uncommitted_rows_count;
13203   stats.max_data_file_length= stat.fragment_extent_space;
13204   stats.delete_length= stat.fragment_extent_free_space;
13205 
13206   DBUG_PRINT("exit", ("stats.records: %d  "
13207                       "stat->row_count: %d  "
13208                       "no_uncommitted_rows_count: %d"
13209                       "stat->fragment_extent_space: %u  "
13210                       "stat->fragment_extent_free_space: %u",
13211                       (int)stats.records,
13212                       (int)stat.row_count,
13213                       (int)no_uncommitted_rows_count,
13214                       (uint)stat.fragment_extent_space,
13215                       (uint)stat.fragment_extent_free_space));
13216   DBUG_RETURN(0);
13217 }
13218 
13219 /**
13220   Update 'row_count' in shared table statistcs if any rows where
13221   inserted/deleted by the local transaction related to specified
13222  'local_stat'.
13223   Should be called when transaction has succesfully commited its changes.
13224 */
13225 static
modify_shared_stats(NDB_SHARE * share,Ndb_local_table_statistics * local_stat)13226 void modify_shared_stats(NDB_SHARE *share,
13227                          Ndb_local_table_statistics *local_stat)
13228 {
13229   if (local_stat->no_uncommitted_rows_count)
13230   {
13231     pthread_mutex_lock(&share->mutex);
13232     DBUG_ASSERT(share->stat.row_count != ~(ha_rows)0);// should never be invalid
13233     if (share->stat.row_count != ~(ha_rows)0)
13234     {
13235       DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",
13236                           share->table_name, (ulong) share->stat.row_count,
13237                           local_stat->no_uncommitted_rows_count));
13238       share->stat.row_count=
13239         ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)
13240          ? share->stat.row_count+local_stat->no_uncommitted_rows_count
13241          : 0;
13242     }
13243     pthread_mutex_unlock(&share->mutex);
13244     local_stat->no_uncommitted_rows_count= 0;
13245   }
13246 }
13247 
13248 /* If part_id contains a legal partition id, ndbstat returns the
13249    partition-statistics pertaining to that partition only.
13250    Otherwise, it returns the table-statistics,
13251    which is an aggregate over all partitions of that table.
13252  */
13253 static
13254 int
ndb_get_table_statistics(THD * thd,ha_ndbcluster * file,bool report_error,Ndb * ndb,const NdbRecord * record,struct Ndb_statistics * ndbstat,bool have_lock,uint part_id)13255 ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,
13256                          const NdbRecord *record,
13257                          struct Ndb_statistics * ndbstat,
13258                          bool have_lock,
13259                          uint part_id)
13260 {
13261   Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
13262   NdbTransaction* pTrans;
13263   NdbError error;
13264   int retries= 100;
13265   int reterr= 0;
13266   int retry_sleep= 30; /* 30 milliseconds */
13267   const char *dummyRowPtr;
13268   NdbOperation::GetValueSpec extraGets[8];
13269   Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;
13270   Uint32 size, fragid;
13271 #ifndef DBUG_OFF
13272   char buff[22], buff2[22], buff3[22], buff4[22], buff5[22], buff6[22];
13273 #endif
13274   DBUG_ENTER("ndb_get_table_statistics");
13275 
13276   DBUG_ASSERT(record != 0);
13277 
13278   /* We use the passed in NdbRecord just to get access to the
13279      table, we mask out any/all columns it may have and add
13280      our reads as extraGets.  This is necessary as they are
13281      all pseudo-columns
13282   */
13283   extraGets[0].column= NdbDictionary::Column::ROW_COUNT;
13284   extraGets[0].appStorage= &rows;
13285   extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;
13286   extraGets[1].appStorage= &commits;
13287   extraGets[2].column= NdbDictionary::Column::ROW_SIZE;
13288   extraGets[2].appStorage= &size;
13289   extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;
13290   extraGets[3].appStorage= &fixed_mem;
13291   extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;
13292   extraGets[4].appStorage= &var_mem;
13293   extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;
13294   extraGets[5].appStorage= &ext_space;
13295   extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;
13296   extraGets[6].appStorage= &free_ext_space;
13297   extraGets[7].column= NdbDictionary::Column::FRAGMENT;
13298   extraGets[7].appStorage= &fragid;
13299 
13300   const Uint32 codeWords= 1;
13301   Uint32 codeSpace[ codeWords ];
13302   NdbInterpretedCode code(NULL, // Table is irrelevant
13303                           &codeSpace[0],
13304                           codeWords);
13305   if ((code.interpret_exit_last_row() != 0) ||
13306       (code.finalise() != 0))
13307   {
13308     reterr= code.getNdbError().code;
13309     DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
13310                         error.code, error.message));
13311     DBUG_RETURN(reterr);
13312   }
13313 
13314   do
13315   {
13316     Uint32 count= 0;
13317     Uint64 sum_rows= 0;
13318     Uint64 sum_commits= 0;
13319     Uint64 sum_row_size= 0;
13320     Uint64 sum_mem= 0;
13321     Uint64 sum_ext_space= 0;
13322     Uint64 sum_free_ext_space= 0;
13323     NdbScanOperation*pOp;
13324     int check;
13325 
13326     if ((pTrans= ndb->startTransaction()) == NULL)
13327     {
13328       error= ndb->getNdbError();
13329       goto retry;
13330     }
13331 
13332     NdbScanOperation::ScanOptions options;
13333     options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |
13334                             NdbScanOperation::ScanOptions::SO_GETVALUE |
13335                             NdbScanOperation::ScanOptions::SO_INTERPRETED;
13336     /* Set batch_size=1, as we need only one row per fragment. */
13337     options.batch= 1;
13338     options.extraGetValues= &extraGets[0];
13339     options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]);
13340     options.interpretedCode= &code;
13341 
13342     if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,
13343                                 empty_mask,
13344                                 &options,
13345                                 sizeof(NdbScanOperation::ScanOptions))) == NULL)
13346     {
13347       error= pTrans->getNdbError();
13348       goto retry;
13349     }
13350     thd_ndb->m_scan_count++;
13351     thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);
13352 
13353     thd_ndb->m_execute_count++;
13354     DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
13355     if (pTrans->execute(NdbTransaction::NoCommit,
13356                         NdbOperation::AbortOnError,
13357                         TRUE) == -1)
13358     {
13359       error= pTrans->getNdbError();
13360       goto retry;
13361     }
13362 
13363     while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)
13364     {
13365       DBUG_PRINT("info", ("nextResult rows: %d  commits: %d"
13366                           "fixed_mem_size %d var_mem_size %d "
13367                           "fragmentid %d extent_space %d free_extent_space %d",
13368                           (int)rows, (int)commits, (int)fixed_mem,
13369                           (int)var_mem, (int)fragid, (int)ext_space,
13370                           (int)free_ext_space));
13371 
13372       if ((part_id != ~(uint)0) && fragid != part_id)
13373       {
13374         continue;
13375       }
13376 
13377       sum_rows+= rows;
13378       sum_commits+= commits;
13379       if (sum_row_size < size)
13380         sum_row_size= size;
13381       sum_mem+= fixed_mem + var_mem;
13382       count++;
13383       sum_ext_space += ext_space;
13384       sum_free_ext_space += free_ext_space;
13385 
13386       if ((part_id != ~(uint)0) && fragid == part_id)
13387       {
13388         break;
13389       }
13390     }
13391 
13392     if (check == -1)
13393     {
13394       error= pOp->getNdbError();
13395       goto retry;
13396     }
13397 
13398     pOp->close(TRUE);
13399 
13400     ndb->closeTransaction(pTrans);
13401 
13402     ndbstat->row_count= sum_rows;
13403     ndbstat->commit_count= sum_commits;
13404     ndbstat->row_size= (ulong)sum_row_size;
13405     ndbstat->fragment_memory= sum_mem;
13406     ndbstat->fragment_extent_space= sum_ext_space;
13407     ndbstat->fragment_extent_free_space= sum_free_ext_space;
13408 
13409     DBUG_PRINT("exit", ("records: %s  commits: %s "
13410                         "row_size: %s  mem: %s "
13411                         "allocated: %s  free: %s "
13412                         "count: %u",
13413 			llstr(sum_rows, buff),
13414                         llstr(sum_commits, buff2),
13415                         llstr(sum_row_size, buff3),
13416                         llstr(sum_mem, buff4),
13417                         llstr(sum_ext_space, buff5),
13418                         llstr(sum_free_ext_space, buff6),
13419                         count));
13420 
13421     DBUG_RETURN(0);
13422 retry:
13423     if(report_error)
13424     {
13425       if (file && pTrans)
13426       {
13427         reterr= file->ndb_err(pTrans, have_lock);
13428       }
13429       else
13430       {
13431         const NdbError& tmp= error;
13432         ERR_PRINT(tmp);
13433         reterr= ndb_to_mysql_error(&tmp);
13434       }
13435     }
13436     else
13437       reterr= error.code;
13438 
13439     if (pTrans)
13440     {
13441       ndb->closeTransaction(pTrans);
13442       pTrans= NULL;
13443     }
13444     if (error.status == NdbError::TemporaryError &&
13445         retries-- && !thd->killed)
13446     {
13447       do_retry_sleep(retry_sleep);
13448       continue;
13449     }
13450     break;
13451   } while(1);
13452   DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
13453                       error.code, error.message));
13454   DBUG_RETURN(reterr);
13455 }
13456 
13457 /**
13458   Create a .ndb file to serve as a placeholder indicating
13459   that the table with this name is a ndb table.
13460 */
13461 
write_ndb_file(const char * name)13462 int ha_ndbcluster::write_ndb_file(const char *name)
13463 {
13464   File file;
13465   bool error=1;
13466   char path[FN_REFLEN];
13467 
13468   DBUG_ENTER("write_ndb_file");
13469   DBUG_PRINT("enter", ("name: %s", name));
13470 
13471 #ifndef EMBEDDED_LIBRARY
13472   (void)strxnmov(path, FN_REFLEN-1,
13473                  mysql_data_home,"/",name,ha_ndb_ext,NullS);
13474 #else
13475   (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);
13476 #endif
13477 
13478   if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
13479   {
13480     // It's an empty file
13481     error=0;
13482     my_close(file,MYF(0));
13483   }
13484   DBUG_RETURN(error);
13485 }
13486 
13487 #ifndef NDB_WITH_NEW_MRR_INTERFACE
13488 bool
null_value_index_search(KEY_MULTI_RANGE * ranges,KEY_MULTI_RANGE * end_range,HANDLER_BUFFER * buffer)13489 ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges,
13490 				       KEY_MULTI_RANGE *end_range,
13491 				       HANDLER_BUFFER *buffer)
13492 {
13493   DBUG_ENTER("null_value_index_search");
13494   KEY* key_info= table->key_info + active_index;
13495   KEY_MULTI_RANGE *range= ranges;
13496   ulong reclength= table->s->reclength;
13497   uchar *curr= (uchar*)buffer->buffer;
13498   uchar *end_of_buffer= (uchar*)buffer->buffer_end;
13499 
13500   /* All passed ranges whose results could fit into the
13501    * buffer are examined, although some may later be
13502    * marked for skipping, wasting buffer space.
13503    */
13504   assert(!(range->range_flag & SKIP_RANGE));
13505 
13506   for (; range<end_range && curr+reclength <= end_of_buffer;
13507        range++)
13508   {
13509     const uchar *key= range->start_key.key;
13510     uint key_len= range->start_key.length;
13511     if (check_null_in_key(key_info, key, key_len))
13512       DBUG_RETURN(TRUE);
13513     curr += reclength;
13514   }
13515   DBUG_RETURN(FALSE);
13516 }
13517 #endif
13518 
check_read_before_write_removal()13519 void ha_ndbcluster::check_read_before_write_removal()
13520 {
13521   DBUG_ENTER("check_read_before_write_removal");
13522 
13523   /* Must have determined that rbwr is possible */
13524   assert(m_read_before_write_removal_possible);
13525   m_read_before_write_removal_used= true;
13526 
13527   /* Can't use on table with hidden primary key */
13528   assert(table_share->primary_key != MAX_KEY);
13529 
13530   /* Index must be unique */
13531   DBUG_PRINT("info", ("using index %d", active_index));
13532   const KEY *key= table->key_info + active_index;
13533   assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);
13534 
13535   DBUG_VOID_RETURN;
13536 }
13537 
13538 #ifndef NDB_WITH_NEW_MRR_INTERFACE
13539 /*
13540   This is used to check if an ordered index scan is needed for a range in
13541   a multi range read.
13542   If a scan is not needed, we use a faster primary/unique key operation
13543   instead.
13544 */
13545 static my_bool
read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type,const KEY * key_info,const KEY_MULTI_RANGE * r)13546 read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,
13547                       const KEY_MULTI_RANGE *r)
13548 {
13549   if (cur_index_type == ORDERED_INDEX)
13550     return TRUE;
13551   if (cur_index_type == PRIMARY_KEY_INDEX ||
13552       cur_index_type == UNIQUE_INDEX)
13553     return FALSE;
13554   DBUG_ASSERT(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
13555               cur_index_type == UNIQUE_ORDERED_INDEX);
13556   if (r->start_key.length != key_info->key_length ||
13557       r->start_key.flag != HA_READ_KEY_EXACT)
13558     return TRUE;                                // Not exact match, need scan
13559   if (cur_index_type == UNIQUE_ORDERED_INDEX &&
13560       check_null_in_key(key_info, r->start_key.key,r->start_key.length))
13561     return TRUE;                                // Can't use for NULL values
13562   return FALSE;
13563 }
13564 
13565 int
read_multi_range_first(KEY_MULTI_RANGE ** found_range_p,KEY_MULTI_RANGE * ranges,uint range_count,bool sorted,HANDLER_BUFFER * buffer)13566 ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
13567                                       KEY_MULTI_RANGE *ranges,
13568                                       uint range_count,
13569                                       bool sorted,
13570                                       HANDLER_BUFFER *buffer)
13571 {
13572   KEY* key_info= table->key_info + active_index;
13573   NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
13574   ulong reclength= table_share->reclength;
13575   NdbTransaction *trans= m_thd_ndb->trans;
13576   int error;
13577 
13578   DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
13579   DBUG_PRINT("info", ("blob fields=%d read_set=0x%x", table_share->blob_fields, table->read_set->bitmap[0]));
13580 
13581   /**
13582    * Blobs and unique hash index with NULL can't be batched currently.
13583    * Neither are pushed lookup joins batchable.
13584    */
13585   if (uses_blob_value(table->read_set) ||
13586       (cur_index_type ==  UNIQUE_INDEX &&
13587        has_null_in_unique_index(active_index) &&
13588        null_value_index_search(ranges, ranges+range_count, buffer))
13589       || (m_pushed_join_operation==PUSHED_ROOT &&
13590          !m_disable_pushed_join &&
13591          !m_pushed_join_member->get_query_def().isScanQuery())
13592       || m_delete_cannot_batch || m_update_cannot_batch)
13593   {
13594     DBUG_PRINT("info", ("read_multi_range not possible, falling back to default handler implementation"));
13595     m_disable_multi_read= TRUE;
13596     DBUG_RETURN(handler::read_multi_range_first(found_range_p,
13597                                                 ranges,
13598                                                 range_count,
13599                                                 sorted,
13600                                                 buffer));
13601   }
13602 
13603   /**
13604    * There may still be an open m_multi_cursor from the previous mrr access on this handler.
13605    * Close it now to free up resources for this NdbScanOperation.
13606    */
13607   if (unlikely((error= close_scan())))
13608     DBUG_RETURN(error);
13609 
13610   m_disable_multi_read= FALSE;
13611 
13612   /*
13613    * Copy arguments into member variables
13614    */
13615   m_multi_ranges= ranges;
13616   multi_range_curr= ranges;
13617   multi_range_end= ranges+range_count;
13618   multi_range_sorted= sorted;
13619   multi_range_buffer= buffer;
13620 
13621   /*
13622    * read multi range will read ranges as follows (if not ordered)
13623    *
13624    * input    read order
13625    * ======   ==========
13626    * pk-op 1  pk-op 1
13627    * pk-op 2  pk-op 2
13628    * range 3  range (3,5) NOTE result rows will be intermixed
13629    * pk-op 4  pk-op 4
13630    * range 5
13631    * pk-op 6  pk-op 6
13632    */
13633 
13634   /*
13635     We first loop over all ranges, converting into primary/unique key
13636     operations if possible, and counting ranges that require an
13637     ordered index scan. If the supplied HANDLER_BUFFER is too small, we
13638     may also need to do only part of the multi read at once.
13639 
13640     Afterwards, we create the ordered index scan cursor (if needed).
13641   */
13642 
13643   DBUG_ASSERT(cur_index_type != UNDEFINED_INDEX);
13644   DBUG_ASSERT(m_multi_cursor==NULL);
13645   DBUG_ASSERT(m_active_query==NULL);
13646 
13647   const NdbOperation* lastOp= trans ? trans->getLastDefinedOperation() : 0;
13648   const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
13649   uchar *row_buf= (uchar *)buffer->buffer;
13650   const uchar *end_of_buffer= buffer->buffer_end;
13651   uint num_scan_ranges= 0;
13652   uint i;
13653   bool any_real_read= FALSE;
13654 
13655   if (m_read_before_write_removal_possible)
13656     check_read_before_write_removal();
13657   for (i= 0; i < range_count; i++)
13658   {
13659     KEY_MULTI_RANGE *r= &ranges[i];
13660 
13661     part_id_range part_spec;
13662     if (m_use_partition_pruning)
13663     {
13664       get_partition_set(table, table->record[0], active_index, &r->start_key,
13665                         &part_spec);
13666       DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
13667                           part_spec.start_part, part_spec.end_part));
13668       /*
13669         If partition pruning has found no partition in set
13670         we can skip this scan
13671       */
13672       if (part_spec.start_part > part_spec.end_part)
13673       {
13674         /*
13675           We can skip this partition since the key won't fit into any
13676           partition
13677         */
13678         r->range_flag|= SKIP_RANGE;
13679         row_buf += reclength;
13680         continue;
13681       }
13682       if (!trans &&
13683           (part_spec.start_part == part_spec.end_part))
13684         if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
13685                                                         error))))
13686           DBUG_RETURN(error);
13687     }
13688     r->range_flag&= ~(uint)SKIP_RANGE;
13689 
13690     if ((m_pushed_join_operation==PUSHED_ROOT &&
13691          m_pushed_join_member->get_query_def().isScanQuery()) || // Pushed joins restricted to ordered range scan in mrr
13692         read_multi_needs_scan(cur_index_type, key_info, r))
13693     {
13694       if (!trans)
13695       {
13696         // ToDo see if we can use start_transaction_key here instead
13697         if (!m_use_partition_pruning)
13698         {
13699           get_partition_set(table, table->record[0], active_index, &r->start_key,
13700                             &part_spec);
13701           if (part_spec.start_part == part_spec.end_part)
13702           {
13703             if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
13704                                                             error))))
13705               DBUG_RETURN(error);
13706           }
13707           else if (unlikely(!(trans= start_transaction(error))))
13708             DBUG_RETURN(error);
13709         }
13710         else if (unlikely(!(trans= start_transaction(error))))
13711           DBUG_RETURN(error);
13712       }
13713 
13714       any_real_read= TRUE;
13715       DBUG_PRINT("info", ("any_real_read= TRUE"));
13716 
13717       /*
13718         If we reach the limit of ranges allowed in a single scan: stop
13719         here, send what we have so far, and continue when done with that.
13720       */
13721       if (i > NdbIndexScanOperation::MaxRangeNo)
13722       {
13723         DBUG_PRINT("info", ("Reached the limit of ranges allowed in a single"
13724                             "scan"));
13725         break;
13726       }
13727 
13728 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
13729       /* Create the scan operation for the first scan range. */
13730       if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
13731                             active_index,
13732                             !m_active_query && sorted))
13733       {
13734         if (!m_active_query)
13735         {
13736           const int error= create_pushed_join();
13737           if (unlikely(error))
13738             DBUG_RETURN(error);
13739 
13740           NdbQuery* const query= m_active_query;
13741           if (sorted &&
13742               query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))
13743             ERR_RETURN(query->getNdbError());
13744         }
13745       }
13746       else
13747 #endif
13748       if (!m_multi_cursor)
13749       {
13750         if (m_pushed_join_operation == PUSHED_ROOT)
13751         {
13752           m_thd_ndb->m_pushed_queries_dropped++;
13753         }
13754         /* Do a multi-range index scan for ranges not done by primary/unique key. */
13755         NdbScanOperation::ScanOptions options;
13756         NdbInterpretedCode code(m_table);
13757 
13758         options.optionsPresent=
13759           NdbScanOperation::ScanOptions::SO_SCANFLAGS |
13760           NdbScanOperation::ScanOptions::SO_PARALLEL;
13761 
13762         options.scan_flags=
13763           NdbScanOperation::SF_ReadRangeNo |
13764           NdbScanOperation::SF_MultiRange;
13765 
13766         if (lm == NdbOperation::LM_Read)
13767           options.scan_flags|= NdbScanOperation::SF_KeyInfo;
13768         if (sorted)
13769           options.scan_flags|= NdbScanOperation::SF_OrderByFull;
13770 
13771         options.parallel= DEFAULT_PARALLELISM;
13772 
13773         NdbOperation::GetValueSpec gets[2];
13774         if (table_share->primary_key == MAX_KEY)
13775           get_hidden_fields_scan(&options, gets);
13776 
13777         if (m_cond && m_cond->generate_scan_filter(&code, &options))
13778           ERR_RETURN(code.getNdbError());
13779 
13780         /* Define scan */
13781         NdbIndexScanOperation *scanOp= trans->scanIndex
13782           (m_index[active_index].ndb_record_key,
13783            m_ndb_record,
13784            lm,
13785            (uchar *)(table->read_set->bitmap),
13786            NULL, /* All bounds specified below */
13787            &options,
13788            sizeof(NdbScanOperation::ScanOptions));
13789 
13790         if (!scanOp)
13791           ERR_RETURN(trans->getNdbError());
13792 
13793         m_multi_cursor= scanOp;
13794 
13795         /*
13796           We do not get_blob_values() here, as when using blobs we always
13797           fallback to non-batched multi range read (see if statement at
13798           top of this function).
13799         */
13800 
13801         /* We set m_next_row=0 to say that no row was fetched from the scan yet. */
13802         m_next_row= 0;
13803       }
13804 
13805       Ndb::PartitionSpec ndbPartitionSpec;
13806       const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;
13807 
13808       /* If this table uses user-defined partitioning, use MySQLD provided
13809        * partition info as pruning info
13810        * Otherwise, scan range pruning is performed automatically by
13811        * NDBAPI based on distribution key values.
13812        */
13813       if (m_use_partition_pruning &&
13814           m_user_defined_partitioning &&
13815           (part_spec.start_part == part_spec.end_part))
13816       {
13817         DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",
13818                             part_spec.start_part));
13819         ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;
13820         ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;
13821         ndbPartSpecPtr= &ndbPartitionSpec;
13822       }
13823 
13824       /* Include this range in the ordered index scan. */
13825       NdbIndexScanOperation::IndexBound bound;
13826       compute_index_bounds(bound, key_info, &r->start_key, &r->end_key, 0);
13827       bound.range_no= i;
13828 
13829       const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
13830       if (m_active_query)
13831       {
13832         DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));
13833         if (m_active_query->setBound(key_rec, &bound))
13834         {
13835           ERR_RETURN(trans->getNdbError());
13836         }
13837       }
13838       else
13839       {
13840         if (m_multi_cursor->setBound(key_rec,
13841                                      bound,
13842                                      ndbPartSpecPtr, // Only for user-def tables
13843                                      sizeof(Ndb::PartitionSpec)))
13844         {
13845           ERR_RETURN(trans->getNdbError());
13846         }
13847       }
13848 
13849       r->range_flag&= ~(uint)UNIQUE_RANGE;
13850       num_scan_ranges++;
13851     }
13852     else // if ((...PUSHED_ROOT && m_pushed_join->get_query_def().isScanQuery()) ||...
13853     {
13854       if (m_pushed_join_operation == PUSHED_ROOT)
13855       {
13856         m_thd_ndb->m_pushed_queries_dropped++;
13857       }
13858       if (!trans)
13859       {
13860         DBUG_ASSERT(active_index != MAX_KEY);
13861         if (unlikely(!(trans= start_transaction_key(active_index,
13862                                                     r->start_key.key,
13863                                                     error))))
13864           DBUG_RETURN(error);
13865       }
13866       /*
13867         Convert to primary/unique key operation.
13868 
13869         If there is not enough buffer for reading the row: stop here, send
13870         what we have so far, and continue when done with that.
13871       */
13872       if (row_buf + reclength > end_of_buffer)
13873         break;
13874 
13875       if (m_read_before_write_removal_used)
13876       {
13877         r->range_flag|= READ_KEY_FROM_RANGE;
13878         continue;
13879       }
13880       else
13881       {
13882         any_real_read= TRUE;
13883         DBUG_PRINT("info", ("m_read_before_write_removal_used == FALSE, "
13884                             "any_real_read= TRUE"));
13885       }
13886       r->range_flag|= UNIQUE_RANGE;
13887 
13888       Uint32 partitionId;
13889       Uint32* ppartitionId = NULL;
13890 
13891       if (m_user_defined_partitioning &&
13892           (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
13893            cur_index_type == PRIMARY_KEY_INDEX))
13894       {
13895         partitionId=part_spec.start_part;
13896         ppartitionId=&partitionId;
13897       }
13898 
13899       DBUG_PRINT("info", ("Generating Pk/Unique key read for range %u", i));
13900 
13901       // 'Pushable codepath' is incomplete and expected not
13902       // to be produced as make_join_pushed() handle
13903       // AT_MULTI_UNIQUE_KEY as non-pushable
13904       if (m_pushed_join_operation==PUSHED_ROOT &&
13905           !m_disable_pushed_join &&
13906           !m_pushed_join_member->get_query_def().isScanQuery())
13907       {
13908         DBUG_ASSERT(false);  // Incomplete code, should not be executed
13909         DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead);
13910         const int error= pk_unique_index_read_key_pushed(active_index,
13911                                                          r->start_key.key,
13912                                                          ppartitionId);
13913         if (unlikely(error))
13914           DBUG_RETURN(error);
13915       }
13916       else
13917       {
13918         if (m_pushed_join_operation == PUSHED_ROOT)
13919         {
13920           DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));
13921           m_thd_ndb->m_pushed_queries_dropped++;
13922         }
13923         const NdbOperation* op;
13924         if (!(op= pk_unique_index_read_key(active_index,
13925                                            r->start_key.key,
13926                                            row_buf, lm,
13927                                            ppartitionId)))
13928           ERR_RETURN(trans->getNdbError());
13929       }
13930       row_buf+= reclength;
13931     }
13932   }
13933   DBUG_ASSERT(i > 0 || i == range_count);       // Require progress
13934   m_multi_range_defined_end= ranges + i;
13935 
13936   buffer->end_of_used_area= row_buf;
13937 
13938   if (m_active_query != NULL &&
13939       m_pushed_join_member->get_query_def().isScanQuery())
13940   {
13941     m_thd_ndb->m_scan_count++;
13942     if (sorted)
13943     {
13944       m_thd_ndb->m_sorted_scan_count++;
13945     }
13946 
13947     bool prunable = false;
13948     if (unlikely(m_active_query->isPrunable(prunable) != 0))
13949       ERR_RETURN(m_active_query->getNdbError());
13950     if (prunable)
13951       m_thd_ndb->m_pruned_scan_count++;
13952 
13953     DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));
13954     DBUG_ASSERT(!m_multi_cursor);
13955   };
13956   if (m_multi_cursor)
13957   {
13958     DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",
13959                         m_multi_cursor->getPruned()));
13960     m_thd_ndb->m_scan_count++;
13961     m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);
13962     if (sorted)
13963     {
13964       m_thd_ndb->m_sorted_scan_count++;
13965     }
13966   };
13967 
13968   if (any_real_read)
13969   {
13970     /* Get pointer to first range key operation (not scans) */
13971     const NdbOperation* rangeOp= lastOp ? lastOp->next() :
13972       trans->getFirstDefinedOperation();
13973 
13974     DBUG_PRINT("info", ("Executing reads"));
13975 
13976     if (execute_no_commit_ie(m_thd_ndb, trans) == 0)
13977     {
13978       m_multi_range_result_ptr= buffer->buffer;
13979 
13980       /* We must check the result of any primary or unique key
13981        * ranges now, as these operations may be invalidated by
13982        * further execute+releaseOperations calls on this transaction by
13983        * different handler objects.
13984        */
13985       KEY_MULTI_RANGE* rangeInfo= multi_range_curr;
13986 
13987       for (;rangeInfo < m_multi_range_defined_end; rangeInfo++)
13988       {
13989         DBUG_PRINT("info", ("range flag is %u", rangeInfo->range_flag));
13990         if (rangeInfo->range_flag & SKIP_RANGE)
13991           continue;
13992 
13993         if ((rangeInfo->range_flag & UNIQUE_RANGE) &&
13994             (!(rangeInfo->range_flag & READ_KEY_FROM_RANGE)))
13995         {
13996           assert(rangeOp != NULL);
13997           if (rangeOp->getNdbError().code == 0)
13998           {
13999             /* Successful read, results are in buffer.
14000              */
14001             rangeInfo->range_flag &= ~(uint)EMPTY_RANGE;
14002 
14003             DBUG_PRINT("info", ("Unique range op has result"));
14004           }
14005           else
14006           {
14007             NdbError err= rangeOp->getNdbError();
14008 
14009             if (err.classification !=
14010                 NdbError::NoDataFound)
14011               DBUG_RETURN(ndb_err(trans));
14012 
14013             DBUG_PRINT("info", ("Unique range op has no result"));
14014             /* Indicate to read_multi_range_next that this
14015              * result is empty
14016              */
14017             rangeInfo->range_flag |= EMPTY_RANGE;
14018           }
14019 
14020           /* Move to next completed operation */
14021           rangeOp= trans->getNextCompletedOperation(rangeOp);
14022         }
14023 
14024         /* For scan ranges, do nothing here */
14025       }
14026     }
14027     else
14028       ERR_RETURN(trans->getNdbError());
14029   }
14030 
14031   DBUG_RETURN(read_multi_range_next(found_range_p));
14032 }
14033 
14034 int
read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)14035 ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
14036 {
14037   DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
14038   if (m_disable_multi_read)
14039   {
14040     DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
14041   }
14042 
14043   const ulong reclength= table_share->reclength;
14044 
14045   while (multi_range_curr < m_multi_range_defined_end)
14046   {
14047     if (multi_range_curr->range_flag & SKIP_RANGE)
14048     {
14049       /* Nothing in this range, move to next one, skipping a buffer
14050        'slot'
14051       */
14052       m_multi_range_result_ptr += reclength;
14053       multi_range_curr++;
14054     }
14055     else if (multi_range_curr->range_flag & READ_KEY_FROM_RANGE)
14056     {
14057       DBUG_PRINT("info", ("using read before write removal optimisation"));
14058       KEY* key_info= table->key_info + active_index;
14059       key_restore(table->record[0], (uchar*)multi_range_curr->start_key.key,
14060                   key_info, key_info->key_length);
14061       table->status= 0;
14062       multi_range_curr++;
14063       DBUG_RETURN(0);
14064     }
14065     else if (multi_range_curr->range_flag & UNIQUE_RANGE)
14066     {
14067       /*
14068         Move to next range; we can have at most one record from a unique range.
14069       */
14070       KEY_MULTI_RANGE *old_multi_range_curr= multi_range_curr;
14071       multi_range_curr= old_multi_range_curr + 1;
14072       /*
14073         Clear m_active_cursor; it is used as a flag in update_row() /
14074         delete_row() to know whether the current tuple is from a scan
14075         or pk operation.
14076       */
14077       m_active_cursor= NULL;
14078       const uchar *src_row= m_multi_range_result_ptr;
14079       m_multi_range_result_ptr= src_row + table_share->reclength;
14080 
14081       if (!(old_multi_range_curr->range_flag & EMPTY_RANGE))
14082       {
14083         *multi_range_found_p= old_multi_range_curr;
14084         memcpy(table->record[0], src_row, table_share->reclength);
14085         DBUG_RETURN(0);
14086       }
14087 
14088       /* No row found, so fall through to try the next range. */
14089     }
14090     else
14091     {
14092       /* An index scan range. */
14093       {
14094         int res;
14095         if ((res= read_multi_range_fetch_next()) != 0)
14096           DBUG_RETURN(res);
14097       }
14098       if (!m_next_row)
14099       {
14100         /*
14101           The whole scan is done, and the cursor has been closed.
14102           So nothing more for this range. Move to next.
14103         */
14104         multi_range_curr++;
14105       }
14106       else
14107       {
14108         int current_range_no= m_current_range_no;
14109         int expected_range_no;
14110         /*
14111           For a sorted index scan, we will receive rows in increasing range_no
14112           order, so we can return ranges in order, pausing when range_no
14113           indicate that the currently processed range (multi_range_curr) is
14114           done.
14115 
14116           But for unsorted scan, we may receive a high range_no from one
14117           fragment followed by a low range_no from another fragment. So we
14118           need to process all index scan ranges together.
14119         */
14120         if (!multi_range_sorted ||
14121             (expected_range_no= multi_range_curr - m_multi_ranges)
14122                 == current_range_no)
14123         {
14124           *multi_range_found_p= m_multi_ranges + current_range_no;
14125           /* Copy out data from the new row. */
14126           unpack_record(table->record[0], m_next_row);
14127           table->status= 0;
14128           /*
14129             Mark that we have used this row, so we need to fetch a new
14130             one on the next call.
14131           */
14132           m_next_row= 0;
14133           /*
14134             Set m_active_cursor; it is used as a flag in update_row() /
14135             delete_row() to know whether the current tuple is from a scan or
14136             pk operation.
14137           */
14138           m_active_cursor= m_multi_cursor;
14139 
14140           DBUG_RETURN(0);
14141         }
14142         else if (current_range_no > expected_range_no)
14143         {
14144           /* Nothing more in scan for this range. Move to next. */
14145           multi_range_curr++;
14146         }
14147         else
14148         {
14149           /*
14150             Should not happen. Ranges should be returned from NDB API in
14151             the order we requested them.
14152           */
14153           DBUG_ASSERT(0);
14154           multi_range_curr++;                     // Attempt to carry on
14155         }
14156       }
14157     }
14158   }
14159 
14160   if (multi_range_curr == multi_range_end)
14161   {
14162     DBUG_RETURN(HA_ERR_END_OF_FILE);
14163   }
14164 
14165   /*
14166     Read remaining ranges
14167   */
14168   DBUG_RETURN(read_multi_range_first(multi_range_found_p,
14169                                      multi_range_curr,
14170                                      multi_range_end - multi_range_curr,
14171                                      multi_range_sorted,
14172                                      multi_range_buffer));
14173 }
14174 
14175 /*
14176   Fetch next row from the ordered index cursor in multi range scan.
14177 
14178   We keep the next row in m_next_row, and the range_no of the
14179   next row in m_current_range_no. This is used in sorted index scan
14180   to correctly interleave rows from primary/unique key operations with
14181   rows from the scan.
14182 */
14183 int
read_multi_range_fetch_next()14184 ha_ndbcluster::read_multi_range_fetch_next()
14185 {
14186   DBUG_ENTER("read_multi_range_fetch_next");
14187 
14188   if (m_active_query)
14189   {
14190     DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));
14191     if (!m_next_row)
14192     {
14193       int res= fetch_next_pushed();
14194       if (res == NdbQuery::NextResult_gotRow)
14195       {
14196         m_current_range_no= 0;
14197 //      m_current_range_no= cursor->get_range_no();  // FIXME SPJ, need rangeNo from index scan
14198       }
14199       else if (res == NdbQuery::NextResult_scanComplete)
14200       {
14201         /* We have fetched the last row from the scan. */
14202         m_active_query->close(FALSE);
14203         m_active_query= 0;
14204         m_next_row= 0;
14205         DBUG_RETURN(0);
14206       }
14207       else
14208       {
14209         /* An error. */
14210         DBUG_RETURN(res);
14211       }
14212     }
14213   }
14214   else if (m_multi_cursor)
14215   {
14216     if (!m_next_row)
14217     {
14218       NdbIndexScanOperation *cursor= (NdbIndexScanOperation *)m_multi_cursor;
14219       int res= fetch_next(cursor);
14220       if (res == 0)
14221       {
14222         m_current_range_no= cursor->get_range_no();
14223       }
14224       else if (res == 1)
14225       {
14226         /* We have fetched the last row from the scan. */
14227         cursor->close(FALSE, TRUE);
14228         m_active_cursor= 0;
14229         m_multi_cursor= 0;
14230         m_next_row= 0;
14231         DBUG_RETURN(0);
14232       }
14233       else
14234       {
14235         /* An error. */
14236         DBUG_RETURN(res);
14237       }
14238     }
14239   }
14240   DBUG_RETURN(0);
14241 }
14242 #endif
14243 
14244 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
14245 
14246 /**
14247  * Try to find pushable subsets of a join plan.
14248  * @param hton unused (maybe useful for other engines).
14249  * @param thd Thread.
14250  * @param plan The join plan to examine.
14251  * @return Possible error code.
14252  */
14253 static
ndbcluster_make_pushed_join(handlerton * hton,THD * thd,AQP::Join_plan * plan)14254 int ndbcluster_make_pushed_join(handlerton *hton,
14255                                 THD* thd,
14256                                 AQP::Join_plan* plan)
14257 {
14258   DBUG_ENTER("ndbcluster_make_pushed_join");
14259   (void)ha_ndb_ext; // prevents compiler warning.
14260 
14261   if (THDVAR(thd, join_pushdown))
14262   {
14263     ndb_pushed_builder_ctx pushed_builder(*plan);
14264 
14265     for (uint i= 0; i < plan->get_access_count()-1; i++)
14266     {
14267       const AQP::Table_access* const join_root= plan->get_table_access(i);
14268       const ndb_pushed_join* pushed_join= NULL;
14269 
14270       // Try to build a ndb_pushed_join starting from 'join_root'
14271       int error= pushed_builder.make_pushed_join(join_root, pushed_join);
14272       if (unlikely(error))
14273       {
14274         if (error < 0)  // getNdbError() gives us the error code
14275         {
14276           ERR_SET(pushed_builder.getNdbError(),error);
14277         }
14278         join_root->get_table()->file->print_error(error, MYF(0));
14279         DBUG_RETURN(error);
14280       }
14281 
14282       // Assign any produced pushed_join definitions to
14283       // the ha_ndbcluster instance representing its root.
14284       if (pushed_join != NULL)
14285       {
14286         ha_ndbcluster* const handler=
14287           static_cast<ha_ndbcluster*>(join_root->get_table()->file);
14288 
14289         error= handler->assign_pushed_join(pushed_join);
14290         if (unlikely(error))
14291         {
14292           delete pushed_join;
14293           handler->print_error(error, MYF(0));
14294           DBUG_RETURN(error);
14295         }
14296       }
14297     }
14298   }
14299   DBUG_RETURN(0);
14300 }
14301 #endif
14302 
14303 
14304 /**
14305  * In case a pushed join having the table for this handler as its root
14306  * has been produced. ::assign_pushed_join() is responsible for setting
14307  * up this ha_ndbcluster instance such that the prepared NdbQuery
14308  * might be instantiated at execution time.
14309  */
14310 int
assign_pushed_join(const ndb_pushed_join * pushed_join)14311 ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)
14312 {
14313   DBUG_ENTER("assign_pushed_join");
14314   m_thd_ndb->m_pushed_queries_defined++;
14315 
14316   for (uint i = 0; i < pushed_join->get_operation_count(); i++)
14317   {
14318     const TABLE* const tab= pushed_join->get_table(i);
14319     DBUG_ASSERT(tab->file->ht == ht);
14320     ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);
14321     child->m_pushed_join_member= pushed_join;
14322     child->m_pushed_join_operation= i;
14323   }
14324 
14325   DBUG_PRINT("info", ("Assigned pushed join with %d child operations",
14326                       pushed_join->get_operation_count()-1));
14327 
14328   DBUG_RETURN(0);
14329 }
14330 
14331 
14332 /**
14333  * First level of filtering tables which *maybe* may be part of
14334  * a pushed query: Returning 'false' will eliminate this table
14335  * from being a part of a pushed join.
14336  * A 'reason' for rejecting this table is required if 'false'
14337  * is returned.
14338  */
14339 bool
maybe_pushable_join(const char * & reason) const14340 ha_ndbcluster::maybe_pushable_join(const char*& reason) const
14341 {
14342   reason= "";
14343   if (uses_blob_value(table->read_set))
14344   {
14345     reason= "select list can't contain BLOB columns";
14346     return false;
14347   }
14348   if (m_user_defined_partitioning)
14349   {
14350     reason= "has user defined partioning";
14351     return false;
14352   }
14353 
14354   // Pushed operations may not set locks.
14355   const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);
14356   switch (lockMode)
14357   {
14358   case NdbOperation::LM_CommittedRead:
14359     return true;
14360 
14361   case NdbOperation::LM_Read:
14362   case NdbOperation::LM_Exclusive:
14363     reason= "lock modes other than 'read committed' not implemented";
14364     return false;
14365 
14366   default: // Other lock modes not used by handler.
14367     assert(false);
14368     return false;
14369   }
14370 
14371   return true;
14372 }
14373 
14374 /**
14375  * Check if this table access operation (and a number of succeding operation)
14376  * can be pushed to the cluster and executed there. This requires that there
14377  * is an NdbQueryDefiniton and that it still matches the corresponds to the
14378  * type of operation that we intend to execute. (The MySQL server will
14379  * sometimes change its mind and replace a scan with a lookup or vice versa
14380  * as it works its way into the nested loop join.)
14381  *
14382  * @param type This is the operation type that the server want to execute.
14383  * @param idx  Index used whenever relevant for operation type
14384  * @param needSorted True if the root operation is an ordered index scan
14385  * with sorted results.
14386  * @return True if the operation may be pushed.
14387  */
14388 #ifndef NDB_WITHOUT_JOIN_PUSHDOWN
14389 bool
check_if_pushable(int type,uint idx,bool needSorted) const14390 ha_ndbcluster::check_if_pushable(int type,  //NdbQueryOperationDef::Type,
14391                                  uint idx,
14392                                  bool needSorted) const
14393 {
14394   if (m_disable_pushed_join)
14395   {
14396     DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
14397     return false;
14398   }
14399   return   m_pushed_join_operation == PUSHED_ROOT
14400         && m_pushed_join_member    != NULL
14401         && m_pushed_join_member->match_definition(
14402                         type,
14403                         (idx<MAX_KEY) ? &m_index[idx] : NULL,
14404                         needSorted);
14405 }
14406 
14407 int
create_pushed_join(const NdbQueryParamValue * keyFieldParams,uint paramCnt)14408 ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)
14409 {
14410   DBUG_ENTER("create_pushed_join");
14411   DBUG_ASSERT(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);
14412 
14413   NdbQuery* const query=
14414     m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);
14415 
14416   if (unlikely(query==NULL))
14417     ERR_RETURN(m_thd_ndb->trans->getNdbError());
14418 
14419   // Bind to instantiated NdbQueryOperations.
14420   for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)
14421   {
14422     const TABLE* const tab= m_pushed_join_member->get_table(i);
14423     ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);
14424 
14425     DBUG_ASSERT(handler->m_pushed_join_operation==(int)i);
14426     NdbQueryOperation* const op= query->getQueryOperation(i);
14427     handler->m_pushed_operation= op;
14428 
14429     // Bind to result buffers
14430     const NdbRecord* const resultRec= handler->m_ndb_record;
14431     int res= op->setResultRowRef(
14432                         resultRec,
14433                         handler->_m_next_row,
14434                         (uchar *)(tab->read_set->bitmap));
14435     if (unlikely(res))
14436       ERR_RETURN(query->getNdbError());
14437 
14438     // We clear 'm_next_row' to say that no row was fetched from the query yet.
14439     handler->_m_next_row= 0;
14440   }
14441 
14442   DBUG_ASSERT(m_active_query==NULL);
14443   m_active_query= query;
14444   m_thd_ndb->m_pushed_queries_executed++;
14445 
14446   DBUG_RETURN(0);
14447 }
14448 #endif
14449 
14450 
14451 /**
14452  * Check if this table access operation is part of a pushed join operation
14453  * which is actively executing.
14454  */
14455 bool
check_is_pushed() const14456 ha_ndbcluster::check_is_pushed() const
14457 {
14458   if (m_pushed_join_member == NULL)
14459     return false;
14460 
14461   handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;
14462   return (static_cast<ha_ndbcluster*>(root)->m_active_query);
14463 }
14464 
14465 uint
number_of_pushed_joins() const14466 ha_ndbcluster::number_of_pushed_joins() const
14467 {
14468   if (m_pushed_join_member == NULL)
14469     return 0;
14470   else
14471     return m_pushed_join_member->get_operation_count();
14472 }
14473 
14474 const TABLE*
root_of_pushed_join() const14475 ha_ndbcluster::root_of_pushed_join() const
14476 {
14477   if (m_pushed_join_member == NULL)
14478     return NULL;
14479   else
14480     return m_pushed_join_member->get_table(PUSHED_ROOT);
14481 }
14482 
14483 const TABLE*
parent_of_pushed_join() const14484 ha_ndbcluster::parent_of_pushed_join() const
14485 {
14486   if (m_pushed_join_operation > PUSHED_ROOT)
14487   {
14488     DBUG_ASSERT(m_pushed_join_member!=NULL);
14489     uint parent_ix= m_pushed_join_member
14490                     ->get_query_def().getQueryOperation(m_pushed_join_operation)
14491                     ->getParentOperation(0)
14492                     ->getQueryOperationIx();
14493     return m_pushed_join_member->get_table(parent_ix);
14494   }
14495   return NULL;
14496 }
14497 
14498 /**
14499   @param[in] comment  table comment defined by user
14500 
14501   @return
14502     table comment + additional
14503 */
14504 char*
update_table_comment(const char * comment)14505 ha_ndbcluster::update_table_comment(
14506                                 /* out: table comment + additional */
14507         const char*     comment)/* in:  table comment defined by user */
14508 {
14509   THD *thd= current_thd;
14510   uint length= strlen(comment);
14511   if (length > 64000 - 3)
14512   {
14513     return((char*)comment); /* string too long */
14514   }
14515 
14516   Ndb* ndb;
14517   if (!(ndb= get_ndb(thd)))
14518   {
14519     return((char*)comment);
14520   }
14521 
14522   if (ndb->setDatabaseName(m_dbname))
14523   {
14524     return((char*)comment);
14525   }
14526   const NDBTAB* tab= m_table;
14527   DBUG_ASSERT(tab != NULL);
14528 
14529   char *str;
14530   const char *fmt="%s%snumber_of_replicas: %d";
14531   const unsigned fmt_len_plus_extra= length + strlen(fmt);
14532   if ((str= (char*) my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
14533   {
14534     sql_print_error("ha_ndbcluster::update_table_comment: "
14535                     "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra);
14536     return (char*)comment;
14537   }
14538 
14539   my_snprintf(str,fmt_len_plus_extra,fmt,comment,
14540               length > 0 ? " ":"",
14541               tab->getReplicaCount());
14542   return str;
14543 }
14544 
14545 
14546 /**
14547   Utility thread main loop.
14548 */
ndb_util_thread_func(void * arg MY_ATTRIBUTE ((unused)))14549 pthread_handler_t ndb_util_thread_func(void *arg MY_ATTRIBUTE((unused)))
14550 {
14551   THD *thd; /* needs to be first for thread_stack */
14552   struct timespec abstime;
14553   Thd_ndb *thd_ndb= NULL;
14554   uint share_list_size= 0;
14555   NDB_SHARE **share_list= NULL;
14556 
14557   my_thread_init();
14558   DBUG_ENTER("ndb_util_thread");
14559   DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));
14560 
14561    pthread_mutex_lock(&LOCK_ndb_util_thread);
14562 
14563   thd= new THD; /* note that contructor of THD uses DBUG_ */
14564   if (thd == NULL)
14565   {
14566     my_errno= HA_ERR_OUT_OF_MEM;
14567     DBUG_RETURN(NULL);
14568   }
14569   THD_CHECK_SENTRY(thd);
14570   pthread_detach_this_thread();
14571   ndb_util_thread= pthread_self();
14572 
14573   thd->thread_stack= (char*)&thd; /* remember where our stack is */
14574   if (thd->store_globals())
14575     goto ndb_util_thread_fail;
14576   lex_start(thd);
14577   thd->init_for_queries();
14578   thd_set_command(thd, COM_DAEMON);
14579 #ifndef NDB_THD_HAS_NO_VERSION
14580   thd->version=refresh_version;
14581 #endif
14582   thd->client_capabilities = 0;
14583   thd->security_ctx->skip_grants();
14584   my_net_init(&thd->net, 0);
14585 
14586   CHARSET_INFO *charset_connection;
14587   charset_connection= get_charset_by_csname("utf8",
14588                                             MY_CS_PRIMARY, MYF(MY_WME));
14589   thd->variables.character_set_client= charset_connection;
14590   thd->variables.character_set_results= charset_connection;
14591   thd->variables.collation_connection= charset_connection;
14592   thd->update_charset();
14593 
14594   /* Signal successful initialization */
14595   ndb_util_thread_running= 1;
14596   pthread_cond_signal(&COND_ndb_util_ready);
14597   pthread_mutex_unlock(&LOCK_ndb_util_thread);
14598 
14599   /*
14600     wait for mysql server to start
14601   */
14602   mysql_mutex_lock(&LOCK_server_started);
14603   while (!mysqld_server_started)
14604   {
14605     set_timespec(abstime, 1);
14606     mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,
14607                          &abstime);
14608     if (ndbcluster_terminating)
14609     {
14610       mysql_mutex_unlock(&LOCK_server_started);
14611       pthread_mutex_lock(&LOCK_ndb_util_thread);
14612       goto ndb_util_thread_end;
14613     }
14614   }
14615   mysql_mutex_unlock(&LOCK_server_started);
14616 
14617   /*
14618     Wait for cluster to start
14619   */
14620   pthread_mutex_lock(&LOCK_ndb_util_thread);
14621   while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
14622   {
14623     /* ndb not connected yet */
14624     pthread_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread);
14625     if (ndbcluster_terminating)
14626       goto ndb_util_thread_end;
14627   }
14628   pthread_mutex_unlock(&LOCK_ndb_util_thread);
14629 
14630   /* Get thd_ndb for this thread */
14631   if (!(thd_ndb= Thd_ndb::seize(thd)))
14632   {
14633     sql_print_error("Could not allocate Thd_ndb object");
14634     pthread_mutex_lock(&LOCK_ndb_util_thread);
14635     goto ndb_util_thread_end;
14636   }
14637   thd_set_thd_ndb(thd, thd_ndb);
14638   thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
14639 
14640   if (opt_ndb_extra_logging && ndb_binlog_running)
14641     sql_print_information("NDB Binlog: Ndb tables initially read only.");
14642 
14643   set_timespec(abstime, 0);
14644   for (;;)
14645   {
14646     pthread_mutex_lock(&LOCK_ndb_util_thread);
14647     if (!ndbcluster_terminating)
14648       pthread_cond_timedwait(&COND_ndb_util_thread,
14649                              &LOCK_ndb_util_thread,
14650                              &abstime);
14651     if (ndbcluster_terminating) /* Shutting down server */
14652       goto ndb_util_thread_end;
14653     pthread_mutex_unlock(&LOCK_ndb_util_thread);
14654 #ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
14655     DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",
14656                                    opt_ndb_cache_check_time));
14657 #endif
14658 
14659     /*
14660       Check if the Ndb object in thd_ndb is still valid(it will be
14661       invalid if connection to cluster has been lost) and recycle
14662       it if necessary.
14663     */
14664     if (!check_ndb_in_thd(thd, false))
14665     {
14666       set_timespec(abstime, 1);
14667       continue;
14668     }
14669 
14670     /*
14671       Regularly give the ndb_binlog component chance to set it self up
14672       i.e at first start it needs to create the ndb_* system tables
14673       and setup event operations on those. In case of lost connection
14674       to cluster, the ndb_* system tables are hopefully still there
14675       but the event operations need to be recreated.
14676     */
14677     if (!ndb_binlog_setup(thd))
14678     {
14679       /* Failed to setup binlog, try again in 1 second */
14680       set_timespec(abstime, 1);
14681       continue;
14682     }
14683 
14684     if (opt_ndb_cache_check_time == 0)
14685     {
14686       /* Wake up in 1 second to check if value has changed */
14687       set_timespec(abstime, 1);
14688       continue;
14689     }
14690 
14691     /* Lock mutex and fill list with pointers to all open tables */
14692     NDB_SHARE *share;
14693     pthread_mutex_lock(&ndbcluster_mutex);
14694     uint i, open_count, record_count= ndbcluster_open_tables.records;
14695     if (share_list_size < record_count)
14696     {
14697       NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
14698       if (!new_share_list)
14699       {
14700         sql_print_warning("ndb util thread: malloc failure, "
14701                           "query cache not maintained properly");
14702         pthread_mutex_unlock(&ndbcluster_mutex);
14703         goto next;                               // At least do not crash
14704       }
14705       delete [] share_list;
14706       share_list_size= record_count;
14707       share_list= new_share_list;
14708     }
14709     for (i= 0, open_count= 0; i < record_count; i++)
14710     {
14711       share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);
14712       if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
14713           <= 0)
14714         continue; // injector thread is the only user, skip statistics
14715       /* ndb_share reference temporary, free below */
14716       share->use_count++; /* Make sure the table can't be closed */
14717       share->util_thread= true;
14718       DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
14719                                share->key, share->use_count));
14720       DBUG_PRINT("ndb_util_thread",
14721                  ("Found open table[%d]: %s, use_count: %d",
14722                   i, share->table_name, share->use_count));
14723 
14724       /* Store pointer to table */
14725       share_list[open_count++]= share;
14726     }
14727     pthread_mutex_unlock(&ndbcluster_mutex);
14728 
14729     /* Iterate through the open files list */
14730     for (i= 0; i < open_count; i++)
14731     {
14732       share= share_list[i];
14733       if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
14734           <= 1)
14735       {
14736         /*
14737           Util thread and injector thread is the only user, skip statistics
14738 	*/
14739         /* ndb_share reference temporary free */
14740         DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
14741                                  share->key, share->use_count));
14742 
14743         pthread_mutex_lock(&ndbcluster_mutex);
14744         share->util_thread= false;
14745         free_share(&share, true);
14746         pthread_mutex_unlock(&ndbcluster_mutex);
14747         continue;
14748       }
14749       DBUG_PRINT("ndb_util_thread",
14750                  ("Fetching commit count for: %s", share->key));
14751 
14752       struct Ndb_statistics stat;
14753       uint lock;
14754       pthread_mutex_lock(&share->mutex);
14755       lock= share->commit_count_lock;
14756       pthread_mutex_unlock(&share->mutex);
14757       {
14758         /* Contact NDB to get commit count for table */
14759         Ndb* ndb= thd_ndb->ndb;
14760         if (ndb->setDatabaseName(share->db))
14761         {
14762           goto loop_next;
14763         }
14764         Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
14765         if (ndbtab_g.get_table() &&
14766             ndb_get_table_statistics(thd, NULL, FALSE, ndb,
14767                                      ndbtab_g.get_table()->getDefaultRecord(),
14768                                      &stat) == 0)
14769         {
14770 #ifndef DBUG_OFF
14771           char buff[22], buff2[22];
14772 #endif
14773           DBUG_PRINT("info",
14774                      ("Table: %s  commit_count: %s  rows: %s",
14775                       share->key,
14776                       llstr(stat.commit_count, buff),
14777                       llstr(stat.row_count, buff2)));
14778         }
14779         else
14780         {
14781           DBUG_PRINT("ndb_util_thread",
14782                      ("Error: Could not get commit count for table %s",
14783                       share->key));
14784           stat.commit_count= 0;
14785         }
14786       }
14787   loop_next:
14788       pthread_mutex_lock(&share->mutex);
14789       if (share->commit_count_lock == lock)
14790         share->commit_count= stat.commit_count;
14791       pthread_mutex_unlock(&share->mutex);
14792 
14793       /* ndb_share reference temporary free */
14794       DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
14795                                share->key, share->use_count));
14796       pthread_mutex_lock(&ndbcluster_mutex);
14797       share->util_thread= false;
14798       free_share(&share, true);
14799       pthread_mutex_unlock(&ndbcluster_mutex);
14800     }
14801 next:
14802     /* Calculate new time to wake up */
14803     set_timespec_nsec(abstime, opt_ndb_cache_check_time * 1000000ULL);
14804   }
14805 
14806   pthread_mutex_lock(&LOCK_ndb_util_thread);
14807 
14808 ndb_util_thread_end:
14809   net_end(&thd->net);
14810 ndb_util_thread_fail:
14811   if (share_list)
14812     delete [] share_list;
14813   if (thd_ndb)
14814   {
14815     Thd_ndb::release(thd_ndb);
14816     thd_set_thd_ndb(thd, NULL);
14817   }
14818   thd->cleanup();
14819   delete thd;
14820 
14821   /* signal termination */
14822   ndb_util_thread_running= 0;
14823   pthread_cond_signal(&COND_ndb_util_ready);
14824   pthread_mutex_unlock(&LOCK_ndb_util_thread);
14825   DBUG_PRINT("exit", ("ndb_util_thread"));
14826 
14827   DBUG_LEAVE;                               // Must match DBUG_ENTER()
14828   my_thread_end();
14829   pthread_exit(0);
14830   return NULL;                              // Avoid compiler warnings
14831 }
14832 
14833 /*
14834   Condition pushdown
14835 */
14836 /**
14837   Push a condition to ndbcluster storage engine for evaluation
14838   during table   and index scans. The conditions will be stored on a stack
14839   for possibly storing several conditions. The stack can be popped
14840   by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
14841   will clear the stack.
14842   The current implementation supports arbitrary AND/OR nested conditions
14843   with comparisons between columns and constants (including constant
14844   expressions and function calls) and the following comparison operators:
14845   =, !=, >, >=, <, <=, "is null", and "is not null".
14846 
14847   @retval
14848     NULL The condition was supported and will be evaluated for each
14849          row found during the scan
14850   @retval
14851     cond The condition was not supported and all rows will be returned from
14852          the scan for evaluation (and thus not saved on stack)
14853 */
14854 const
14855 Item*
cond_push(const Item * cond)14856 ha_ndbcluster::cond_push(const Item *cond)
14857 {
14858   DBUG_ENTER("ha_ndbcluster::cond_push");
14859 
14860 #if 1
14861   if (cond->used_tables() & ~table->map)
14862   {
14863     /**
14864      * 'cond' refers fields from other tables, or other instances
14865      * of this table, -> reject it.
14866      * (Optimizer need to have a better understanding of what is
14867      *  pushable by each handler.)
14868      */
14869     DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););
14870     DBUG_RETURN(cond);
14871   }
14872 #else
14873   /*
14874     Make sure that 'cond' does not refer field(s) from other tables
14875     or other instances of this table.
14876     (This was a legacy bug in optimizer)
14877   */
14878   DBUG_ASSERT(!(cond->used_tables() & ~table->map));
14879 #endif
14880   if (!m_cond)
14881     m_cond= new ha_ndbcluster_cond;
14882   if (!m_cond)
14883   {
14884     my_errno= HA_ERR_OUT_OF_MEM;
14885     DBUG_RETURN(cond);
14886   }
14887   DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););
14888   DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
14889 }
14890 
14891 /**
14892   Pop the top condition from the condition stack of the handler instance.
14893 */
14894 void
cond_pop()14895 ha_ndbcluster::cond_pop()
14896 {
14897   if (m_cond)
14898     m_cond->cond_pop();
14899 }
14900 
14901 
14902 /*
14903   Implements the SHOW NDB STATUS command.
14904 */
14905 bool
ndbcluster_show_status(handlerton * hton,THD * thd,stat_print_fn * stat_print,enum ha_stat_type stat_type)14906 ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
14907                        enum ha_stat_type stat_type)
14908 {
14909   char name[16];
14910   char buf[IO_SIZE];
14911   uint buflen;
14912   DBUG_ENTER("ndbcluster_show_status");
14913 
14914   if (stat_type != HA_ENGINE_STATUS)
14915   {
14916     DBUG_RETURN(FALSE);
14917   }
14918 
14919   Ndb* ndb= check_ndb_in_thd(thd);
14920   Thd_ndb *thd_ndb= get_thd_ndb(thd);
14921   struct st_ndb_status ns;
14922   if (ndb)
14923     update_status_variables(thd_ndb, &ns, thd_ndb->connection);
14924   else
14925     update_status_variables(NULL, &ns, g_ndb_cluster_connection);
14926 
14927   buflen=
14928     my_snprintf(buf, sizeof(buf),
14929                 "cluster_node_id=%ld, "
14930                 "connected_host=%s, "
14931                 "connected_port=%ld, "
14932                 "number_of_data_nodes=%ld, "
14933                 "number_of_ready_data_nodes=%ld, "
14934                 "connect_count=%ld",
14935                 ns.cluster_node_id,
14936                 ns.connected_host,
14937                 ns.connected_port,
14938                 ns.number_of_data_nodes,
14939                 ns.number_of_ready_data_nodes,
14940                 ns.connect_count);
14941   if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14942                  STRING_WITH_LEN("connection"), buf, buflen))
14943     DBUG_RETURN(TRUE);
14944 
14945   for (int i= 0; i < MAX_NDB_NODES; i++)
14946   {
14947     if (ns.transaction_hint_count[i] > 0 ||
14948         ns.transaction_no_hint_count[i] > 0)
14949     {
14950       uint namelen= my_snprintf(name, sizeof(name), "node[%d]", i);
14951       buflen= my_snprintf(buf, sizeof(buf),
14952                           "transaction_hint=%ld, transaction_no_hint=%ld",
14953                           ns.transaction_hint_count[i],
14954                           ns.transaction_no_hint_count[i]);
14955       if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14956                      name, namelen, buf, buflen))
14957         DBUG_RETURN(TRUE);
14958     }
14959   }
14960 
14961   if (ndb)
14962   {
14963     Ndb::Free_list_usage tmp;
14964     tmp.m_name= 0;
14965     while (ndb->get_free_list_usage(&tmp))
14966     {
14967       buflen=
14968         my_snprintf(buf, sizeof(buf),
14969                   "created=%u, free=%u, sizeof=%u",
14970                   tmp.m_created, tmp.m_free, tmp.m_sizeof);
14971       if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
14972                      tmp.m_name, strlen(tmp.m_name), buf, buflen))
14973         DBUG_RETURN(TRUE);
14974     }
14975   }
14976   ndbcluster_show_status_binlog(thd, stat_print, stat_type);
14977 
14978   DBUG_RETURN(FALSE);
14979 }
14980 
14981 
get_default_no_partitions(HA_CREATE_INFO * create_info)14982 int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info)
14983 {
14984   if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))
14985   {
14986 err:
14987     my_error(HA_ERR_NO_CONNECTION, MYF(0));
14988     return -1;
14989   }
14990 
14991   THD* thd = current_thd;
14992   if (thd == 0)
14993     goto err;
14994   Thd_ndb * thd_ndb = get_thd_ndb(thd);
14995   if (thd_ndb == 0)
14996     goto err;
14997 
14998   ha_rows max_rows, min_rows;
14999   if (create_info)
15000   {
15001     max_rows= create_info->max_rows;
15002     min_rows= create_info->min_rows;
15003   }
15004   else
15005   {
15006     max_rows= table_share->max_rows;
15007     min_rows= table_share->min_rows;
15008   }
15009   uint no_fragments= get_no_fragments(max_rows >= min_rows ?
15010                                       max_rows : min_rows);
15011   uint reported_frags;
15012   adjusted_frag_count(thd_ndb->ndb,
15013                       no_fragments,
15014                       reported_frags);
15015   return reported_frags;
15016 }
15017 
calculate_key_hash_value(Field ** field_array)15018 uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)
15019 {
15020   Uint32 hash_value;
15021   struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];
15022   struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];
15023   Uint32 i= 0;
15024   int ret_val;
15025   Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
15026   void *buf= (void*)&tmp[0];
15027   Ndb *ndb= m_thd_ndb->ndb;
15028   DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");
15029 
15030   do
15031   {
15032     Field *field= *field_array;
15033     uint len= field->data_length();
15034     DBUG_ASSERT(!field->is_real_null());
15035     if (field->real_type() == MYSQL_TYPE_VARCHAR)
15036       len+= ((Field_varstring*)field)->length_bytes;
15037     key_data[i].ptr= field->ptr;
15038     key_data[i++].len= len;
15039   } while (*(++field_array));
15040   key_data[i].ptr= 0;
15041   if ((ret_val= ndb->computeHash(&hash_value, m_table,
15042                                  key_data_ptr, buf, sizeof(tmp))))
15043   {
15044     DBUG_PRINT("info", ("ret_val = %d", ret_val));
15045     DBUG_ASSERT(FALSE);
15046     abort();
15047   }
15048   DBUG_RETURN(hash_value);
15049 }
15050 
15051 
15052 /*
15053   Set-up auto-partitioning for NDB Cluster
15054 
15055   SYNOPSIS
15056     set_auto_partitions()
15057     part_info                  Partition info struct to set-up
15058 
15059   RETURN VALUE
15060     NONE
15061 
15062   DESCRIPTION
15063     Set-up auto partitioning scheme for tables that didn't define any
15064     partitioning. We'll use PARTITION BY KEY() in this case which
15065     translates into partition by primary key if a primary key exists
15066     and partition by hidden key otherwise.
15067 */
15068 
15069 enum ndb_distribution_enum {
15070   NDB_DISTRIBUTION_KEYHASH= 0,
15071   NDB_DISTRIBUTION_LINHASH= 1
15072 };
15073 static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };
15074 static ulong opt_ndb_distribution;
15075 static TYPELIB distribution_typelib= {
15076   array_elements(distribution_names) - 1,
15077   "",
15078   distribution_names,
15079   NULL
15080 };
15081 static MYSQL_SYSVAR_ENUM(
15082   distribution,                      /* name */
15083   opt_ndb_distribution,              /* var */
15084   PLUGIN_VAR_RQCMDARG,
15085   "Default distribution for new tables in ndb",
15086   NULL,                              /* check func. */
15087   NULL,                              /* update func. */
15088   NDB_DISTRIBUTION_KEYHASH,          /* default */
15089   &distribution_typelib              /* typelib */
15090 );
15091 
15092 
set_auto_partitions(partition_info * part_info)15093 void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
15094 {
15095   DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
15096   part_info->list_of_part_fields= TRUE;
15097   part_info->part_type= HASH_PARTITION;
15098   switch (opt_ndb_distribution)
15099   {
15100   case NDB_DISTRIBUTION_KEYHASH:
15101     part_info->linear_hash_ind= FALSE;
15102     break;
15103   case NDB_DISTRIBUTION_LINHASH:
15104     part_info->linear_hash_ind= TRUE;
15105     break;
15106   default:
15107     DBUG_ASSERT(false);
15108     break;
15109   }
15110   DBUG_VOID_RETURN;
15111 }
15112 
15113 
15114 int
set_range_data(const partition_info * part_info,NdbDictionary::Table & ndbtab) const15115 ha_ndbcluster::set_range_data(const partition_info *part_info,
15116                               NdbDictionary::Table& ndbtab) const
15117 {
15118   const uint num_parts = partition_info_num_parts(part_info);
15119   int error= 0;
15120   bool unsigned_flag= part_info->part_expr->unsigned_flag;
15121   DBUG_ENTER("set_range_data");
15122 
15123   int32 *range_data= (int32*)my_malloc(num_parts*sizeof(int32), MYF(0));
15124   if (!range_data)
15125   {
15126     mem_alloc_error(num_parts*sizeof(int32));
15127     DBUG_RETURN(1);
15128   }
15129   for (uint i= 0; i < num_parts; i++)
15130   {
15131     longlong range_val= part_info->range_int_array[i];
15132     if (unsigned_flag)
15133       range_val-= 0x8000000000000000ULL;
15134     if (range_val < INT_MIN32 || range_val >= INT_MAX32)
15135     {
15136       if ((i != num_parts - 1) ||
15137           (range_val != LONGLONG_MAX))
15138       {
15139         my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
15140         error= 1;
15141         goto error;
15142       }
15143       range_val= INT_MAX32;
15144     }
15145     range_data[i]= (int32)range_val;
15146   }
15147   ndbtab.setRangeListData(range_data, num_parts);
15148 error:
15149   my_free((char*)range_data, MYF(0));
15150   DBUG_RETURN(error);
15151 }
15152 
15153 
15154 int
set_list_data(const partition_info * part_info,NdbDictionary::Table & ndbtab) const15155 ha_ndbcluster::set_list_data(const partition_info *part_info,
15156                              NdbDictionary::Table& ndbtab) const
15157 {
15158   const uint num_list_values = partition_info_num_list_values(part_info);
15159   int32 *list_data= (int32*)my_malloc(num_list_values*2*sizeof(int32), MYF(0));
15160   int error= 0;
15161   bool unsigned_flag= part_info->part_expr->unsigned_flag;
15162   DBUG_ENTER("set_list_data");
15163 
15164   if (!list_data)
15165   {
15166     mem_alloc_error(num_list_values*2*sizeof(int32));
15167     DBUG_RETURN(1);
15168   }
15169   for (uint i= 0; i < num_list_values; i++)
15170   {
15171     LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
15172     longlong list_val= list_entry->list_value;
15173     if (unsigned_flag)
15174       list_val-= 0x8000000000000000ULL;
15175     if (list_val < INT_MIN32 || list_val > INT_MAX32)
15176     {
15177       my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
15178       error= 1;
15179       goto error;
15180     }
15181     list_data[2*i]= (int32)list_val;
15182     list_data[2*i+1]= list_entry->partition_id;
15183   }
15184   ndbtab.setRangeListData(list_data, 2*num_list_values);
15185 error:
15186   my_free((char*)list_data, MYF(0));
15187   DBUG_RETURN(error);
15188 }
15189 
15190 /*
15191   User defined partitioning set-up. We need to check how many fragments the
15192   user wants defined and which node groups to put those into. Later we also
15193   want to attach those partitions to a tablespace.
15194 
15195   All the functionality of the partition function, partition limits and so
15196   forth are entirely handled by the MySQL Server. There is one exception to
15197   this rule for PARTITION BY KEY where NDB handles the hash function and
15198   this type can thus be handled transparently also by NDB API program.
15199   For RANGE, HASH and LIST and subpartitioning the NDB API programs must
15200   implement the function to map to a partition.
15201 */
15202 
15203 int
set_up_partition_info(partition_info * part_info,NdbDictionary::Table & ndbtab) const15204 ha_ndbcluster::set_up_partition_info(partition_info *part_info,
15205                                      NdbDictionary::Table& ndbtab) const
15206 {
15207   uint32 frag_data[MAX_PARTITIONS];
15208   char *ts_names[MAX_PARTITIONS];
15209   ulong fd_index= 0, i, j;
15210   NDBTAB::FragmentType ftype= NDBTAB::UserDefined;
15211   partition_element *part_elem;
15212   List_iterator<partition_element> part_it(part_info->partitions);
15213   int error;
15214   DBUG_ENTER("ha_ndbcluster::set_up_partition_info");
15215 
15216   if (part_info->part_type == HASH_PARTITION &&
15217       part_info->list_of_part_fields == TRUE)
15218   {
15219     Field **fields= part_info->part_field_array;
15220 
15221     ftype= NDBTAB::HashMapPartition;
15222 
15223     for (i= 0; i < part_info->part_field_list.elements; i++)
15224     {
15225       NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);
15226       DBUG_PRINT("info",("setting dist key on %s", col->getName()));
15227       col->setPartitionKey(TRUE);
15228     }
15229   }
15230   else
15231   {
15232     if (!current_thd->variables.new_mode)
15233     {
15234       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
15235                           ER_ILLEGAL_HA_CREATE_OPTION,
15236                           ER(ER_ILLEGAL_HA_CREATE_OPTION),
15237                           ndbcluster_hton_name,
15238                           "LIST, RANGE and HASH partition disabled by default,"
15239                           " use --new option to enable");
15240       DBUG_RETURN(HA_ERR_UNSUPPORTED);
15241     }
15242    /*
15243       Create a shadow field for those tables that have user defined
15244       partitioning. This field stores the value of the partition
15245       function such that NDB can handle reorganisations of the data
15246       even when the MySQL Server isn't available to assist with
15247       calculation of the partition function value.
15248     */
15249     NDBCOL col;
15250     DBUG_PRINT("info", ("Generating partition func value field"));
15251     col.setName("$PART_FUNC_VALUE");
15252     col.setType(NdbDictionary::Column::Int);
15253     col.setLength(1);
15254     col.setNullable(FALSE);
15255     col.setPrimaryKey(FALSE);
15256     col.setAutoIncrement(FALSE);
15257     ndbtab.addColumn(col);
15258     if (part_info->part_type == RANGE_PARTITION)
15259     {
15260       if ((error= set_range_data(part_info, ndbtab)))
15261       {
15262         DBUG_RETURN(error);
15263       }
15264     }
15265     else if (part_info->part_type == LIST_PARTITION)
15266     {
15267       if ((error= set_list_data(part_info, ndbtab)))
15268       {
15269         DBUG_RETURN(error);
15270       }
15271     }
15272   }
15273   ndbtab.setFragmentType(ftype);
15274   i= 0;
15275   do
15276   {
15277     uint ng;
15278     part_elem= part_it++;
15279     if (!part_info->is_sub_partitioned())
15280     {
15281       ng= part_elem->nodegroup_id;
15282       ts_names[fd_index]= part_elem->tablespace_name;
15283       frag_data[fd_index++]= ng;
15284     }
15285     else
15286     {
15287       List_iterator<partition_element> sub_it(part_elem->subpartitions);
15288       j= 0;
15289       do
15290       {
15291         part_elem= sub_it++;
15292         ng= part_elem->nodegroup_id;
15293         ts_names[fd_index]= part_elem->tablespace_name;
15294         frag_data[fd_index++]= ng;
15295       } while (++j < partition_info_num_subparts(part_info));
15296     }
15297   } while (++i < partition_info_num_parts(part_info));
15298 
15299   const bool use_default_num_parts =
15300     partition_info_use_default_num_partitions(part_info);
15301   ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);
15302   ndbtab.setLinearFlag(part_info->linear_hash_ind);
15303   {
15304     ha_rows max_rows= table_share->max_rows;
15305     ha_rows min_rows= table_share->min_rows;
15306     if (max_rows < min_rows)
15307       max_rows= min_rows;
15308     if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
15309     {
15310       ndbtab.setMaxRows(max_rows);
15311       ndbtab.setMinRows(min_rows);
15312     }
15313   }
15314   ndbtab.setFragmentCount(fd_index);
15315   ndbtab.setFragmentData(frag_data, fd_index);
15316   DBUG_RETURN(0);
15317 }
15318 
15319 #ifndef NDB_WITHOUT_ONLINE_ALTER
15320 static
supported_alter_operations()15321 HA_ALTER_FLAGS supported_alter_operations()
15322 {
15323   HA_ALTER_FLAGS alter_flags;
15324   return alter_flags |
15325     HA_ADD_INDEX |
15326     HA_DROP_INDEX |
15327     HA_ADD_UNIQUE_INDEX |
15328     HA_DROP_UNIQUE_INDEX |
15329     HA_ADD_COLUMN |
15330     HA_COLUMN_STORAGE |
15331     HA_COLUMN_FORMAT |
15332     HA_ADD_PARTITION |
15333     HA_ALTER_TABLE_REORG |
15334     HA_CHANGE_AUTOINCREMENT_VALUE;
15335 }
15336 
check_if_supported_alter(TABLE * altered_table,HA_CREATE_INFO * create_info,Alter_info * alter_info,HA_ALTER_FLAGS * alter_flags,uint table_changes)15337 int ha_ndbcluster::check_if_supported_alter(TABLE *altered_table,
15338                                             HA_CREATE_INFO *create_info,
15339                                             Alter_info *alter_info,
15340                                             HA_ALTER_FLAGS *alter_flags,
15341                                             uint table_changes)
15342 {
15343   THD *thd= current_thd;
15344   HA_ALTER_FLAGS not_supported= ~(supported_alter_operations());
15345   uint i;
15346   const NDBTAB *tab= (const NDBTAB *) m_table;
15347   HA_ALTER_FLAGS add_column;
15348   HA_ALTER_FLAGS adding;
15349   HA_ALTER_FLAGS dropping;
15350 
15351   DBUG_ENTER("ha_ndbcluster::check_if_supported_alter");
15352   add_column= add_column | HA_ADD_COLUMN;
15353   adding= adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;
15354   dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15355   partition_info *part_info= altered_table->part_info;
15356   const NDBTAB *old_tab= m_table;
15357 
15358   if (THDVAR(thd, use_copying_alter_table))
15359   {
15360     DBUG_PRINT("info", ("On-line alter table disabled"));
15361     DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15362   }
15363 #ifndef DBUG_OFF
15364   {
15365     char dbug_string[HA_MAX_ALTER_FLAGS+1];
15366     alter_flags->print(dbug_string);
15367     DBUG_PRINT("info", ("Not supported %s", dbug_string));
15368   }
15369 #endif
15370 
15371   if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15372   {
15373     /*
15374       sql_partition.cc tries to compute what is going on
15375       and sets flags...that we clear
15376     */
15377     if (part_info->use_default_num_partitions)
15378     {
15379       alter_flags->clear_bit(HA_COALESCE_PARTITION);
15380       alter_flags->clear_bit(HA_ADD_PARTITION);
15381     }
15382   }
15383 
15384   if ((*alter_flags & not_supported).is_set())
15385   {
15386 #ifndef DBUG_OFF
15387     HA_ALTER_FLAGS tmp = *alter_flags;
15388     tmp&= not_supported;
15389     char dbug_string[HA_MAX_ALTER_FLAGS+1];
15390     tmp.print(dbug_string);
15391     DBUG_PRINT("info", ("Detected unsupported change: %s", dbug_string));
15392 #endif
15393     DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15394   }
15395 
15396   if (alter_flags->is_set(HA_ADD_COLUMN) ||
15397       alter_flags->is_set(HA_ADD_PARTITION) ||
15398       alter_flags->is_set(HA_ALTER_TABLE_REORG))
15399   {
15400      Ndb *ndb= get_ndb(thd);
15401      NDBDICT *dict= ndb->getDictionary();
15402      ndb->setDatabaseName(m_dbname);
15403      NdbDictionary::Table new_tab= *old_tab;
15404 
15405      if (alter_flags->is_set(HA_ADD_COLUMN))
15406      {
15407        NDBCOL col;
15408 
15409        /*
15410          Check that we are only adding columns
15411        */
15412        /*
15413          HA_COLUMN_STORAGE & HA_COLUMN_FORMAT
15414          are set if they are specified in an later cmd
15415          even if they're no change. This is probably a bug
15416          conclusion: add them to add_column-mask, so that we silently "accept" them
15417          In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set
15418          which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases
15419        */
15420        add_column.set_bit(HA_COLUMN_STORAGE);
15421        add_column.set_bit(HA_COLUMN_FORMAT);
15422        if ((*alter_flags & ~add_column).is_set())
15423        {
15424          DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));
15425          DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15426        }
15427        /*
15428          Check for extra fields for hidden primary key
15429          or user defined partitioning
15430        */
15431        if (table_share->primary_key == MAX_KEY ||
15432            part_info->part_type != HASH_PARTITION ||
15433            !part_info->list_of_part_fields)
15434          DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15435 
15436        /* Find the new fields */
15437        for (uint i= table->s->fields; i < altered_table->s->fields; i++)
15438        {
15439          Field *field= altered_table->field[i];
15440          DBUG_PRINT("info", ("Found new field %s", field->field_name));
15441          DBUG_PRINT("info", ("storage_type %i, column_format %i",
15442                              (uint) field->field_storage_type(),
15443                              (uint) field->column_format()));
15444          /* Create new field to check if it can be added */
15445          if ((my_errno= create_ndb_column(0, col, field, create_info,
15446                                           COLUMN_FORMAT_TYPE_DYNAMIC)))
15447          {
15448            DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno));
15449            DBUG_RETURN(my_errno);
15450          }
15451          new_tab.addColumn(col);
15452        }
15453      }
15454 
15455      if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15456      {
15457        new_tab.setFragmentCount(0);
15458        new_tab.setFragmentData(0, 0);
15459      }
15460      else if (alter_flags->is_set(HA_ADD_PARTITION))
15461      {
15462        DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));
15463        new_tab.setFragmentCount(part_info->num_parts);
15464      }
15465 
15466      NDB_Modifiers table_modifiers(ndb_table_modifiers);
15467      table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
15468                            create_info->comment.length);
15469      const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");
15470 
15471      if (mod_nologging->m_found)
15472      {
15473        new_tab.setLogging(!mod_nologging->m_val_bool);
15474      }
15475 
15476      if (dict->supportedAlterTable(*old_tab, new_tab))
15477      {
15478        DBUG_PRINT("info", ("Adding column(s) supported on-line"));
15479      }
15480      else
15481      {
15482        DBUG_PRINT("info",("Adding column not supported on-line"));
15483        DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15484      }
15485   }
15486 
15487   /*
15488     Check that we are not adding multiple indexes
15489   */
15490   if ((*alter_flags & adding).is_set())
15491   {
15492     if (((altered_table->s->keys - table->s->keys) != 1) ||
15493         (*alter_flags & dropping).is_set())
15494     {
15495        DBUG_PRINT("info",("Only one index can be added on-line"));
15496        DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15497     }
15498   }
15499 
15500   /*
15501     Check that we are not dropping multiple indexes
15502   */
15503   if ((*alter_flags & dropping).is_set())
15504   {
15505     if (((table->s->keys - altered_table->s->keys) != 1) ||
15506         (*alter_flags & adding).is_set())
15507     {
15508        DBUG_PRINT("info",("Only one index can be dropped on-line"));
15509        DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15510     }
15511   }
15512 
15513   for (i= 0; i < table->s->fields; i++)
15514   {
15515     Field *field= table->field[i];
15516     const NDBCOL *col= tab->getColumn(i);
15517 
15518     NDBCOL new_col;
15519     create_ndb_column(0, new_col, field, create_info);
15520 
15521     bool index_on_column = false;
15522     /**
15523      * Check all indexes to determine if column has index instead of checking
15524      *   field->flags (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG
15525      *   since field->flags appears to only be set on first column in
15526      *   multi-part index
15527      */
15528     for (uint j= 0; j<table->s->keys; j++)
15529     {
15530       KEY* key_info= table->key_info + j;
15531       KEY_PART_INFO* key_part= key_info->key_part;
15532       KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
15533       for (; key_part != end; key_part++)
15534       {
15535         if (key_part->field->field_index == i)
15536         {
15537           index_on_column= true;
15538           j= table->s->keys; // break outer loop
15539           break;
15540         }
15541       }
15542     }
15543 
15544     if (index_on_column == false && (*alter_flags & adding).is_set())
15545     {
15546       for (uint j= table->s->keys; j<altered_table->s->keys; j++)
15547       {
15548         KEY* key_info= altered_table->key_info + j;
15549         KEY_PART_INFO* key_part= key_info->key_part;
15550         KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
15551         for (; key_part != end; key_part++)
15552         {
15553           if (key_part->field->field_index == i)
15554           {
15555             index_on_column= true;
15556             j= altered_table->s->keys; // break outer loop
15557             break;
15558           }
15559         }
15560       }
15561     }
15562 
15563     /**
15564      * This is a "copy" of code in ::create()
15565      *   that "auto-converts" columns with keys into memory
15566      *   (unless storage disk is explicitly added)
15567      * This is needed to check if getStorageType() == getStorageType()
15568      * further down
15569      */
15570     if (index_on_column)
15571     {
15572       if (field->field_storage_type() == HA_SM_DISK)
15573       {
15574         DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15575       }
15576       new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);
15577     }
15578     else if (field->field_storage_type() == HA_SM_DEFAULT)
15579     {
15580       /**
15581        * If user didn't specify any column format, keep old
15582        *   to make as many alter's as possible online
15583        */
15584       new_col.setStorageType(col->getStorageType());
15585     }
15586 
15587     if (col->getStorageType() != new_col.getStorageType())
15588     {
15589       DBUG_PRINT("info", ("Column storage media is changed"));
15590       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15591     }
15592 
15593     if (field->flags & FIELD_IS_RENAMED)
15594     {
15595       DBUG_PRINT("info", ("Field has been renamed, copy table"));
15596       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15597     }
15598 
15599     if ((field->flags & FIELD_IN_ADD_INDEX) &&
15600         (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))
15601     {
15602       DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
15603       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15604     }
15605   }
15606 
15607   if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())
15608   {
15609     /* Check that only auto_increment value was changed */
15610     HA_ALTER_FLAGS change_auto_flags=
15611       change_auto_flags | HA_CHANGE_AUTOINCREMENT_VALUE;
15612     if ((*alter_flags & ~change_auto_flags).is_set())
15613     {
15614       DBUG_PRINT("info", ("Not only auto_increment value changed"));
15615       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15616     }
15617   }
15618   else
15619   {
15620     /* Check that row format didn't change */
15621     if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
15622         get_row_type() != create_info->row_type)
15623     {
15624       DBUG_PRINT("info", ("Row format changed"));
15625       DBUG_RETURN(HA_ALTER_NOT_SUPPORTED);
15626     }
15627   }
15628 
15629   DBUG_PRINT("info", ("Ndb supports ALTER on-line"));
15630   DBUG_RETURN(HA_ALTER_SUPPORTED_WAIT_LOCK);
15631 }
15632 
alter_table_phase1(THD * thd,TABLE * altered_table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15633 int ha_ndbcluster::alter_table_phase1(THD *thd,
15634                                       TABLE *altered_table,
15635                                       HA_CREATE_INFO *create_info,
15636                                       HA_ALTER_INFO *alter_info,
15637                                       HA_ALTER_FLAGS *alter_flags)
15638 {
15639   int error= 0;
15640   uint i;
15641   Thd_ndb *thd_ndb= get_thd_ndb(thd);
15642   Ndb *ndb= get_ndb(thd);
15643   NDBDICT *dict= ndb->getDictionary();
15644   ndb->setDatabaseName(m_dbname);
15645   NDB_ALTER_DATA *alter_data;
15646   const NDBTAB *old_tab;
15647   NdbDictionary::Table *new_tab;
15648   HA_ALTER_FLAGS adding;
15649   HA_ALTER_FLAGS dropping;
15650 
15651   DBUG_ENTER("alter_table_phase1");
15652   adding=  adding | HA_ADD_INDEX | HA_ADD_UNIQUE_INDEX;
15653   dropping= dropping | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15654 
15655   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase1"))
15656     DBUG_RETURN(HA_ERR_NO_CONNECTION);
15657 
15658   if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))
15659     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
15660   old_tab= alter_data->old_table;
15661   new_tab= alter_data->new_table;
15662   alter_info->data= alter_data;
15663 #ifndef DBUG_OFF
15664   {
15665     char dbug_string[HA_MAX_ALTER_FLAGS+1];
15666     alter_flags->print(dbug_string);
15667     DBUG_PRINT("info", ("altered_table %s, alter_flags %s",
15668                         altered_table->s->table_name.str,
15669                         (char *) dbug_string));
15670   }
15671 #endif
15672 
15673   prepare_for_alter();
15674 
15675   if (dict->beginSchemaTrans() == -1)
15676   {
15677     DBUG_PRINT("info", ("Failed to start schema transaction"));
15678     ERR_PRINT(dict->getNdbError());
15679     error= ndb_to_mysql_error(&dict->getNdbError());
15680     table->file->print_error(error, MYF(0));
15681     goto err;
15682   }
15683 
15684   if ((*alter_flags & adding).is_set())
15685   {
15686     KEY           *key_info;
15687     KEY           *key;
15688     uint          *idx_p;
15689     uint          *idx_end_p;
15690     KEY_PART_INFO *key_part;
15691     KEY_PART_INFO *part_end;
15692     DBUG_PRINT("info", ("Adding indexes"));
15693     key_info= (KEY*) thd->alloc(sizeof(KEY) * alter_info->index_add_count);
15694     key= key_info;
15695     for (idx_p=  alter_info->index_add_buffer,
15696 	 idx_end_p= idx_p + alter_info->index_add_count;
15697 	 idx_p < idx_end_p;
15698 	 idx_p++, key++)
15699     {
15700       /* Copy the KEY struct. */
15701       *key= alter_info->key_info_buffer[*idx_p];
15702       /* Fix the key parts. */
15703       part_end= key->key_part + key->user_defined_key_parts;
15704       for (key_part= key->key_part; key_part < part_end; key_part++)
15705 	key_part->field= table->field[key_part->fieldnr];
15706     }
15707     if ((error= add_index_impl(thd, altered_table, key_info,
15708                                alter_info->index_add_count)))
15709     {
15710       /*
15711 	Exchange the key_info for the error message. If we exchange
15712 	key number by key name in the message later, we need correct info.
15713       */
15714       KEY *save_key_info= table->key_info;
15715       table->key_info= key_info;
15716       table->file->print_error(error, MYF(0));
15717       table->key_info= save_key_info;
15718       goto abort;
15719     }
15720   }
15721 
15722   if ((*alter_flags & dropping).is_set())
15723   {
15724     uint          *key_numbers;
15725     uint          *keyno_p;
15726     uint          *idx_p;
15727     uint          *idx_end_p;
15728     DBUG_PRINT("info", ("Renumbering indexes"));
15729     /* The prepare_drop_index() method takes an array of key numbers. */
15730     key_numbers= (uint*) thd->alloc(sizeof(uint) * alter_info->index_drop_count);
15731     keyno_p= key_numbers;
15732     /* Get the number of each key. */
15733     for (idx_p= alter_info->index_drop_buffer,
15734 	 idx_end_p= idx_p + alter_info->index_drop_count;
15735 	 idx_p < idx_end_p;
15736 	 idx_p++, keyno_p++)
15737       *keyno_p= *idx_p;
15738     /*
15739       Tell the handler to prepare for drop indexes.
15740       This re-numbers the indexes to get rid of gaps.
15741     */
15742     if ((error= prepare_drop_index(table, key_numbers,
15743 				   alter_info->index_drop_count)))
15744     {
15745       table->file->print_error(error, MYF(0));
15746       goto abort;
15747     }
15748   }
15749 
15750   if (alter_flags->is_set(HA_ADD_COLUMN))
15751   {
15752      NDBCOL col;
15753 
15754      /* Find the new fields */
15755      for (i= table->s->fields; i < altered_table->s->fields; i++)
15756      {
15757        Field *field= altered_table->field[i];
15758        DBUG_PRINT("info", ("Found new field %s", field->field_name));
15759        if ((my_errno= create_ndb_column(thd, col, field, create_info,
15760                                         COLUMN_FORMAT_TYPE_DYNAMIC)))
15761        {
15762          error= my_errno;
15763          goto abort;
15764        }
15765        /*
15766          If the user has not specified the field format
15767          make it dynamic to enable on-line add attribute
15768        */
15769        if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&
15770            create_info->row_type == ROW_TYPE_DEFAULT &&
15771            col.getDynamic())
15772        {
15773          push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
15774                              ER_ILLEGAL_HA_CREATE_OPTION,
15775                              "Converted FIXED field to DYNAMIC "
15776                              "to enable on-line ADD COLUMN",
15777                              field->field_name);
15778        }
15779        new_tab->addColumn(col);
15780      }
15781   }
15782 
15783   if (alter_flags->is_set(HA_ALTER_TABLE_REORG) || alter_flags->is_set(HA_ADD_PARTITION))
15784   {
15785     if (alter_flags->is_set(HA_ALTER_TABLE_REORG))
15786     {
15787       new_tab->setFragmentCount(0);
15788       new_tab->setFragmentData(0, 0);
15789     }
15790     else if (alter_flags->is_set(HA_ADD_PARTITION))
15791     {
15792       partition_info *part_info= altered_table->part_info;
15793       new_tab->setFragmentCount(part_info->num_parts);
15794     }
15795 
15796     int res= dict->prepareHashMap(*old_tab, *new_tab);
15797     if (res == -1)
15798     {
15799       const NdbError err= dict->getNdbError();
15800       my_errno= ndb_to_mysql_error(&err);
15801       goto abort;
15802     }
15803   }
15804 
15805   DBUG_RETURN(0);
15806 abort:
15807   if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
15808         == -1)
15809   {
15810     DBUG_PRINT("info", ("Failed to abort schema transaction"));
15811     ERR_PRINT(dict->getNdbError());
15812     error= ndb_to_mysql_error(&dict->getNdbError());
15813   }
15814 err:
15815   set_ndb_share_state(m_share, NSS_INITIAL);
15816   /* ndb_share reference schema free */
15817   DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
15818                            m_share->key, m_share->use_count));
15819   free_share(&m_share); // Decrease ref_count
15820   delete alter_data;
15821   DBUG_RETURN(error);
15822 }
15823 
alter_frm(THD * thd,const char * file,NDB_ALTER_DATA * alter_data)15824 int ha_ndbcluster::alter_frm(THD *thd, const char *file,
15825                              NDB_ALTER_DATA *alter_data)
15826 {
15827   uchar *data= NULL, *pack_data= NULL;
15828   size_t length, pack_length;
15829   int error= 0;
15830 
15831   DBUG_ENTER("alter_frm");
15832 
15833   DBUG_PRINT("enter", ("file: %s", file));
15834 
15835   NDBDICT *dict= alter_data->dictionary;
15836 
15837   // TODO handle this
15838   DBUG_ASSERT(m_table != 0);
15839 
15840   DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED);
15841   if (readfrm(file, &data, &length) ||
15842       packfrm(data, length, &pack_data, &pack_length))
15843   {
15844     DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
15845     my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
15846     my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
15847     error= 1;
15848     my_error(ER_FILE_NOT_FOUND, MYF(0), file);
15849   }
15850   else
15851   {
15852     DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",
15853                         m_tabname));
15854     const NDBTAB *old_tab= alter_data->old_table;
15855     NdbDictionary::Table *new_tab= alter_data->new_table;
15856 
15857     new_tab->setFrm(pack_data, (Uint32)pack_length);
15858     if (dict->alterTableGlobal(*old_tab, *new_tab))
15859     {
15860       DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));
15861       error= ndb_to_mysql_error(&dict->getNdbError());
15862       my_error(error, MYF(0));
15863     }
15864     my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
15865     my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
15866   }
15867 
15868   /* ndb_share reference schema(?) free */
15869   DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free  use_count: %u",
15870                            m_share->key, m_share->use_count));
15871 
15872   DBUG_RETURN(error);
15873 }
15874 
alter_table_phase2(THD * thd,TABLE * altered_table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15875 int ha_ndbcluster::alter_table_phase2(THD *thd,
15876                                       TABLE *altered_table,
15877                                       HA_CREATE_INFO *create_info,
15878                                       HA_ALTER_INFO *alter_info,
15879                                       HA_ALTER_FLAGS *alter_flags)
15880 
15881 {
15882   int error= 0;
15883   Thd_ndb *thd_ndb= get_thd_ndb(thd);
15884   NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;
15885   NDBDICT *dict= alter_data->dictionary;
15886   HA_ALTER_FLAGS dropping;
15887 
15888   DBUG_ENTER("alter_table_phase2");
15889   dropping= dropping  | HA_DROP_INDEX | HA_DROP_UNIQUE_INDEX;
15890 
15891   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase2"))
15892   {
15893     error= HA_ERR_NO_CONNECTION;
15894     goto err;
15895   }
15896 
15897   if ((*alter_flags & dropping).is_set())
15898   {
15899     /* Tell the handler to finally drop the indexes. */
15900     if ((error= final_drop_index(table)))
15901     {
15902       print_error(error, MYF(0));
15903       goto abort;
15904     }
15905   }
15906 
15907   DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));
15908 
15909   DBUG_ASSERT(alter_data);
15910   error= alter_frm(thd, altered_table->s->path.str, alter_data);
15911   if (!error)
15912   {
15913     /*
15914      * Alter succesful, commit schema transaction
15915      */
15916     if (dict->endSchemaTrans() == -1)
15917     {
15918       error= ndb_to_mysql_error(&dict->getNdbError());
15919       DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",
15920                           error));
15921       table->file->print_error(error, MYF(0));
15922       goto err;
15923     }
15924     if ((*alter_flags & HA_CHANGE_AUTOINCREMENT_VALUE).is_set())
15925       error= set_auto_inc_val(thd, create_info->auto_increment_value);
15926     if (error)
15927     {
15928       DBUG_PRINT("info", ("Failed to set auto_increment value"));
15929       goto err;
15930     }
15931   }
15932   else // if (error)
15933   {
15934 abort:
15935     if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
15936         == -1)
15937     {
15938       DBUG_PRINT("info", ("Failed to abort schema transaction"));
15939       ERR_PRINT(dict->getNdbError());
15940     }
15941 err:
15942     /* ndb_share reference schema free */
15943     DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
15944                              m_share->key, m_share->use_count));
15945     delete alter_data;
15946     alter_info->data= 0;
15947   }
15948   set_ndb_share_state(m_share, NSS_INITIAL);
15949   free_share(&m_share); // Decrease ref_count
15950   DBUG_RETURN(error);
15951 }
15952 
alter_table_phase3(THD * thd,TABLE * table,HA_CREATE_INFO * create_info,HA_ALTER_INFO * alter_info,HA_ALTER_FLAGS * alter_flags)15953 int ha_ndbcluster::alter_table_phase3(THD *thd, TABLE *table,
15954                                       HA_CREATE_INFO *create_info,
15955                                       HA_ALTER_INFO *alter_info,
15956                                       HA_ALTER_FLAGS *alter_flags)
15957 {
15958   Thd_ndb *thd_ndb= get_thd_ndb(thd);
15959   DBUG_ENTER("alter_table_phase3");
15960 
15961   NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) alter_info->data;
15962   if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::alter_table_phase3"))
15963   {
15964     delete alter_data;
15965     alter_info->data= 0;
15966     DBUG_RETURN(HA_ERR_NO_CONNECTION);
15967   }
15968 
15969   const char *db= table->s->db.str;
15970   const char *name= table->s->table_name.str;
15971 
15972   /*
15973     all mysqld's will read frms from disk and setup new
15974     event operation for the table (new_op)
15975   */
15976   uint32 table_id= 0, table_version= 0;
15977   DBUG_ASSERT(alter_data != 0);
15978   if (alter_data)
15979   {
15980     table_id= alter_data->table_id;
15981     table_version= alter_data->old_table_version;
15982   }
15983   ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
15984                            db, name,
15985                            table_id, table_version,
15986                            SOT_ONLINE_ALTER_TABLE_PREPARE,
15987                            NULL, NULL);
15988 
15989   /*
15990     Get table id/version for new table
15991   */
15992   table_id= 0;
15993   table_version= 0;
15994   {
15995     Ndb* ndb= get_ndb(thd);
15996     DBUG_ASSERT(ndb != 0);
15997     if (ndb)
15998     {
15999       ndb->setDatabaseName(db);
16000       Ndb_table_guard ndbtab(ndb->getDictionary(), name);
16001       const NDBTAB *new_tab= ndbtab.get_table();
16002       DBUG_ASSERT(new_tab != 0);
16003       if (new_tab)
16004       {
16005         table_id= new_tab->getObjectId();
16006         table_version= new_tab->getObjectVersion();
16007       }
16008     }
16009   }
16010 
16011   /*
16012     all mysqld's will switch to using the new_op, and delete the old
16013     event operation
16014   */
16015   ndbcluster_log_schema_op(thd, thd->query(), thd->query_length(),
16016                            db, name,
16017                            table_id, table_version,
16018                            SOT_ONLINE_ALTER_TABLE_COMMIT,
16019                            NULL, NULL);
16020 
16021   delete alter_data;
16022   alter_info->data= 0;
16023   DBUG_RETURN(0);
16024 }
16025 #endif
16026 
set_up_tablespace(st_alter_tablespace * alter_info,NdbDictionary::Tablespace * ndb_ts)16027 bool set_up_tablespace(st_alter_tablespace *alter_info,
16028                        NdbDictionary::Tablespace *ndb_ts)
16029 {
16030   if (alter_info->extent_size >= (Uint64(1) << 32))
16031   {
16032     // TODO set correct error
16033     return TRUE;
16034   }
16035   ndb_ts->setName(alter_info->tablespace_name);
16036   ndb_ts->setExtentSize(Uint32(alter_info->extent_size));
16037   ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
16038   return FALSE;
16039 }
16040 
set_up_datafile(st_alter_tablespace * alter_info,NdbDictionary::Datafile * ndb_df)16041 bool set_up_datafile(st_alter_tablespace *alter_info,
16042                      NdbDictionary::Datafile *ndb_df)
16043 {
16044   if (alter_info->max_size > 0)
16045   {
16046     my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
16047     return TRUE;
16048   }
16049   ndb_df->setPath(alter_info->data_file_name);
16050   ndb_df->setSize(alter_info->initial_size);
16051   ndb_df->setTablespace(alter_info->tablespace_name);
16052   return FALSE;
16053 }
16054 
set_up_logfile_group(st_alter_tablespace * alter_info,NdbDictionary::LogfileGroup * ndb_lg)16055 bool set_up_logfile_group(st_alter_tablespace *alter_info,
16056                           NdbDictionary::LogfileGroup *ndb_lg)
16057 {
16058   if (alter_info->undo_buffer_size >= (Uint64(1) << 32))
16059   {
16060     // TODO set correct error
16061     return TRUE;
16062   }
16063 
16064   ndb_lg->setName(alter_info->logfile_group_name);
16065   ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));
16066   return FALSE;
16067 }
16068 
set_up_undofile(st_alter_tablespace * alter_info,NdbDictionary::Undofile * ndb_uf)16069 bool set_up_undofile(st_alter_tablespace *alter_info,
16070                      NdbDictionary::Undofile *ndb_uf)
16071 {
16072   ndb_uf->setPath(alter_info->undo_file_name);
16073   ndb_uf->setSize(alter_info->initial_size);
16074   ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
16075   return FALSE;
16076 }
16077 
ndbcluster_alter_tablespace(handlerton * hton,THD * thd,st_alter_tablespace * alter_info)16078 int ndbcluster_alter_tablespace(handlerton *hton,
16079                                 THD* thd, st_alter_tablespace *alter_info)
16080 {
16081   int is_tablespace= 0;
16082   NdbError err;
16083   NDBDICT *dict;
16084   int error;
16085   const char *errmsg;
16086   Ndb *ndb;
16087   DBUG_ENTER("ndbcluster_alter_tablespace");
16088   LINT_INIT(errmsg);
16089 
16090   ndb= check_ndb_in_thd(thd);
16091   if (ndb == NULL)
16092   {
16093     DBUG_RETURN(HA_ERR_NO_CONNECTION);
16094   }
16095   dict= ndb->getDictionary();
16096 
16097   uint32 table_id= 0, table_version= 0;
16098   switch (alter_info->ts_cmd_type){
16099   case (CREATE_TABLESPACE):
16100   {
16101     error= ER_CREATE_FILEGROUP_FAILED;
16102 
16103     NdbDictionary::Tablespace ndb_ts;
16104     NdbDictionary::Datafile ndb_df;
16105     NdbDictionary::ObjectId objid;
16106     if (set_up_tablespace(alter_info, &ndb_ts))
16107     {
16108       DBUG_RETURN(1);
16109     }
16110     if (set_up_datafile(alter_info, &ndb_df))
16111     {
16112       DBUG_RETURN(1);
16113     }
16114     errmsg= "TABLESPACE";
16115     if (dict->createTablespace(ndb_ts, &objid))
16116     {
16117       DBUG_PRINT("error", ("createTablespace returned %d", error));
16118       goto ndberror;
16119     }
16120     table_id = objid.getObjectId();
16121     table_version = objid.getObjectVersion();
16122     if (dict->getWarningFlags() &
16123         NdbDictionary::Dictionary::WarnExtentRoundUp)
16124     {
16125       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16126                           dict->getWarningFlags(),
16127                           "Extent size rounded up to kernel page size");
16128     }
16129     DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
16130     errmsg= "DATAFILE";
16131     if (dict->createDatafile(ndb_df))
16132     {
16133       err= dict->getNdbError();
16134       NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
16135       if (dict->getNdbError().code == 0 &&
16136 	  tmp.getObjectId() == objid.getObjectId() &&
16137 	  tmp.getObjectVersion() == objid.getObjectVersion())
16138       {
16139 	dict->dropTablespace(tmp);
16140       }
16141 
16142       DBUG_PRINT("error", ("createDatafile returned %d", error));
16143       goto ndberror2;
16144     }
16145     if (dict->getWarningFlags() &
16146         NdbDictionary::Dictionary::WarnDatafileRoundUp)
16147     {
16148       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16149                           dict->getWarningFlags(),
16150                           "Datafile size rounded up to extent size");
16151     }
16152     else /* produce only 1 message */
16153     if (dict->getWarningFlags() &
16154         NdbDictionary::Dictionary::WarnDatafileRoundDown)
16155     {
16156       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16157                           dict->getWarningFlags(),
16158                           "Datafile size rounded down to extent size");
16159     }
16160     is_tablespace= 1;
16161     break;
16162   }
16163   case (ALTER_TABLESPACE):
16164   {
16165     error= ER_ALTER_FILEGROUP_FAILED;
16166     if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
16167     {
16168       NdbDictionary::Datafile ndb_df;
16169       if (set_up_datafile(alter_info, &ndb_df))
16170       {
16171 	DBUG_RETURN(1);
16172       }
16173       errmsg= " CREATE DATAFILE";
16174       NdbDictionary::ObjectId objid;
16175       if (dict->createDatafile(ndb_df, false, &objid))
16176       {
16177 	goto ndberror;
16178       }
16179       table_id= objid.getObjectId();
16180       table_version= objid.getObjectVersion();
16181       if (dict->getWarningFlags() &
16182           NdbDictionary::Dictionary::WarnDatafileRoundUp)
16183       {
16184         push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16185                             dict->getWarningFlags(),
16186                             "Datafile size rounded up to extent size");
16187       }
16188       else /* produce only 1 message */
16189       if (dict->getWarningFlags() &
16190           NdbDictionary::Dictionary::WarnDatafileRoundDown)
16191       {
16192         push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16193                             dict->getWarningFlags(),
16194                             "Datafile size rounded down to extent size");
16195       }
16196     }
16197     else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
16198     {
16199       NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
16200       NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
16201       NdbDictionary::ObjectId objid;
16202       df.getTablespaceId(&objid);
16203       table_id = df.getObjectId();
16204       table_version = df.getObjectVersion();
16205       if (ts.getObjectId() == objid.getObjectId() &&
16206 	  strcmp(df.getPath(), alter_info->data_file_name) == 0)
16207       {
16208 	errmsg= " DROP DATAFILE";
16209 	if (dict->dropDatafile(df))
16210 	{
16211 	  goto ndberror;
16212 	}
16213       }
16214       else
16215       {
16216 	DBUG_PRINT("error", ("No such datafile"));
16217 	my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
16218 	DBUG_RETURN(1);
16219       }
16220     }
16221     else
16222     {
16223       DBUG_PRINT("error", ("Unsupported alter tablespace: %d",
16224 			   alter_info->ts_alter_tablespace_type));
16225       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16226     }
16227     is_tablespace= 1;
16228     break;
16229   }
16230   case (CREATE_LOGFILE_GROUP):
16231   {
16232     error= ER_CREATE_FILEGROUP_FAILED;
16233     NdbDictionary::LogfileGroup ndb_lg;
16234     NdbDictionary::Undofile ndb_uf;
16235     NdbDictionary::ObjectId objid;
16236     if (alter_info->undo_file_name == NULL)
16237     {
16238       /*
16239 	REDO files in LOGFILE GROUP not supported yet
16240       */
16241       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16242     }
16243     if (set_up_logfile_group(alter_info, &ndb_lg))
16244     {
16245       DBUG_RETURN(1);
16246     }
16247     errmsg= "LOGFILE GROUP";
16248     if (dict->createLogfileGroup(ndb_lg, &objid))
16249     {
16250       goto ndberror;
16251     }
16252     table_id = objid.getObjectId();
16253     table_version = objid.getObjectVersion();
16254     if (dict->getWarningFlags() &
16255         NdbDictionary::Dictionary::WarnUndobufferRoundUp)
16256     {
16257       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16258                           dict->getWarningFlags(),
16259                           "Undo buffer size rounded up to kernel page size");
16260     }
16261     DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
16262     if (set_up_undofile(alter_info, &ndb_uf))
16263     {
16264       DBUG_RETURN(1);
16265     }
16266     errmsg= "UNDOFILE";
16267     if (dict->createUndofile(ndb_uf))
16268     {
16269       err= dict->getNdbError();
16270       NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
16271       if (dict->getNdbError().code == 0 &&
16272 	  tmp.getObjectId() == objid.getObjectId() &&
16273 	  tmp.getObjectVersion() == objid.getObjectVersion())
16274       {
16275 	dict->dropLogfileGroup(tmp);
16276       }
16277       goto ndberror2;
16278     }
16279     if (dict->getWarningFlags() &
16280         NdbDictionary::Dictionary::WarnUndofileRoundDown)
16281     {
16282       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16283                           dict->getWarningFlags(),
16284                           "Undofile size rounded down to kernel page size");
16285     }
16286     break;
16287   }
16288   case (ALTER_LOGFILE_GROUP):
16289   {
16290     error= ER_ALTER_FILEGROUP_FAILED;
16291     if (alter_info->undo_file_name == NULL)
16292     {
16293       /*
16294 	REDO files in LOGFILE GROUP not supported yet
16295       */
16296       DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16297     }
16298     NdbDictionary::Undofile ndb_uf;
16299     if (set_up_undofile(alter_info, &ndb_uf))
16300     {
16301       DBUG_RETURN(1);
16302     }
16303     errmsg= "CREATE UNDOFILE";
16304     NdbDictionary::ObjectId objid;
16305     if (dict->createUndofile(ndb_uf, false, &objid))
16306     {
16307       goto ndberror;
16308     }
16309     table_id = objid.getObjectId();
16310     table_version = objid.getObjectVersion();
16311     if (dict->getWarningFlags() &
16312         NdbDictionary::Dictionary::WarnUndofileRoundDown)
16313     {
16314       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
16315                           dict->getWarningFlags(),
16316                           "Undofile size rounded down to kernel page size");
16317     }
16318     break;
16319   }
16320   case (DROP_TABLESPACE):
16321   {
16322     error= ER_DROP_FILEGROUP_FAILED;
16323     errmsg= "TABLESPACE";
16324     NdbDictionary::Tablespace ts=
16325       dict->getTablespace(alter_info->tablespace_name);
16326     table_id= ts.getObjectId();
16327     table_version= ts.getObjectVersion();
16328     if (dict->dropTablespace(ts))
16329     {
16330       goto ndberror;
16331     }
16332     is_tablespace= 1;
16333     break;
16334   }
16335   case (DROP_LOGFILE_GROUP):
16336   {
16337     error= ER_DROP_FILEGROUP_FAILED;
16338     errmsg= "LOGFILE GROUP";
16339     NdbDictionary::LogfileGroup lg=
16340       dict->getLogfileGroup(alter_info->logfile_group_name);
16341     table_id= lg.getObjectId();
16342     table_version= lg.getObjectVersion();
16343     if (dict->dropLogfileGroup(lg))
16344     {
16345       goto ndberror;
16346     }
16347     break;
16348   }
16349   case (CHANGE_FILE_TABLESPACE):
16350   {
16351     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16352   }
16353   case (ALTER_ACCESS_MODE_TABLESPACE):
16354   {
16355     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16356   }
16357   default:
16358   {
16359     DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
16360   }
16361   }
16362   if (is_tablespace)
16363     ndbcluster_log_schema_op(thd,
16364                              thd->query(), thd->query_length(),
16365                              "", alter_info->tablespace_name,
16366                              table_id, table_version,
16367                              SOT_TABLESPACE, NULL, NULL);
16368   else
16369     ndbcluster_log_schema_op(thd,
16370                              thd->query(), thd->query_length(),
16371                              "", alter_info->logfile_group_name,
16372                              table_id, table_version,
16373                              SOT_LOGFILE_GROUP, NULL, NULL);
16374   DBUG_RETURN(FALSE);
16375 
16376 ndberror:
16377   err= dict->getNdbError();
16378 ndberror2:
16379   ndb_to_mysql_error(&err);
16380 
16381   my_error(error, MYF(0), errmsg);
16382   DBUG_RETURN(1);
16383 }
16384 
16385 
get_no_parts(const char * name,uint * no_parts)16386 bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts)
16387 {
16388   THD *thd= current_thd;
16389   Ndb *ndb;
16390   NDBDICT *dict;
16391   int err;
16392   DBUG_ENTER("ha_ndbcluster::get_no_parts");
16393   LINT_INIT(err);
16394 
16395   set_dbname(name);
16396   set_tabname(name);
16397   for (;;)
16398   {
16399     if (check_ndb_connection(thd))
16400     {
16401       err= HA_ERR_NO_CONNECTION;
16402       break;
16403     }
16404     ndb= get_ndb(thd);
16405     ndb->setDatabaseName(m_dbname);
16406     Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
16407     if (!ndbtab_g.get_table())
16408       ERR_BREAK(dict->getNdbError(), err);
16409     *no_parts= ndbtab_g.get_table()->getFragmentCount();
16410     DBUG_RETURN(FALSE);
16411   }
16412 
16413   print_error(err, MYF(0));
16414   DBUG_RETURN(TRUE);
16415 }
16416 
ndbcluster_fill_files_table(handlerton * hton,THD * thd,TABLE_LIST * tables,Item * cond)16417 static int ndbcluster_fill_files_table(handlerton *hton,
16418                                        THD *thd,
16419                                        TABLE_LIST *tables,
16420                                        Item *cond)
16421 {
16422   TABLE* table= tables->table;
16423   Ndb *ndb= check_ndb_in_thd(thd);
16424   NdbDictionary::Dictionary* dict= ndb->getDictionary();
16425   NdbDictionary::Dictionary::List dflist;
16426   NdbError ndberr;
16427   uint i;
16428   DBUG_ENTER("ndbcluster_fill_files_table");
16429 
16430   dict->listObjects(dflist, NdbDictionary::Object::Datafile);
16431   ndberr= dict->getNdbError();
16432   if (ndberr.classification != NdbError::NoError)
16433     ERR_RETURN(ndberr);
16434 
16435   for (i= 0; i < dflist.count; i++)
16436   {
16437     NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
16438     Ndb_cluster_connection_node_iter iter;
16439     uint id;
16440 
16441     g_ndb_cluster_connection->init_get_next_node(iter);
16442 
16443     while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
16444     {
16445       init_fill_schema_files_row(table);
16446       NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
16447       ndberr= dict->getNdbError();
16448       if(ndberr.classification != NdbError::NoError)
16449       {
16450         if (ndberr.classification == NdbError::SchemaError)
16451           continue;
16452 
16453         if (ndberr.classification == NdbError::UnknownResultError)
16454           continue;
16455 
16456         ERR_RETURN(ndberr);
16457       }
16458       NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
16459       ndberr= dict->getNdbError();
16460       if (ndberr.classification != NdbError::NoError)
16461       {
16462         if (ndberr.classification == NdbError::SchemaError)
16463           continue;
16464         ERR_RETURN(ndberr);
16465       }
16466 
16467       table->field[IS_FILES_FILE_NAME]->set_notnull();
16468       table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
16469                                               system_charset_info);
16470       table->field[IS_FILES_FILE_TYPE]->set_notnull();
16471       table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
16472                                               system_charset_info);
16473       table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
16474       table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
16475                                                     strlen(df.getTablespace()),
16476                                                     system_charset_info);
16477       table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16478       table->field[IS_FILES_LOGFILE_GROUP_NAME]->
16479         store(ts.getDefaultLogfileGroup(),
16480               strlen(ts.getDefaultLogfileGroup()),
16481               system_charset_info);
16482       table->field[IS_FILES_ENGINE]->set_notnull();
16483       table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16484                                            ndbcluster_hton_name_length,
16485                                            system_charset_info);
16486 
16487       table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
16488       table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
16489                                                  / ts.getExtentSize(), true);
16490       table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
16491       table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
16492                                                   / ts.getExtentSize(), true);
16493       table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16494       table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
16495       table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
16496       table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);
16497       table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
16498       table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);
16499       table->field[IS_FILES_VERSION]->set_notnull();
16500       table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);
16501 
16502       table->field[IS_FILES_ROW_FORMAT]->set_notnull();
16503       table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);
16504 
16505       char extra[30];
16506       int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
16507       table->field[IS_FILES_EXTRA]->set_notnull();
16508       table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16509       schema_table_store_record(thd, table);
16510     }
16511   }
16512 
16513   NdbDictionary::Dictionary::List tslist;
16514   dict->listObjects(tslist, NdbDictionary::Object::Tablespace);
16515   ndberr= dict->getNdbError();
16516   if (ndberr.classification != NdbError::NoError)
16517     ERR_RETURN(ndberr);
16518 
16519   for (i= 0; i < tslist.count; i++)
16520   {
16521     NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];
16522 
16523     NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);
16524     ndberr= dict->getNdbError();
16525     if (ndberr.classification != NdbError::NoError)
16526     {
16527       if (ndberr.classification == NdbError::SchemaError)
16528         continue;
16529       ERR_RETURN(ndberr);
16530     }
16531 
16532     init_fill_schema_files_row(table);
16533     table->field[IS_FILES_FILE_TYPE]->set_notnull();
16534     table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,
16535                                             system_charset_info);
16536 
16537     table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
16538     table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,
16539                                                      strlen(elt.name),
16540                                                      system_charset_info);
16541     table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16542     table->field[IS_FILES_LOGFILE_GROUP_NAME]->
16543       store(ts.getDefaultLogfileGroup(),
16544            strlen(ts.getDefaultLogfileGroup()),
16545            system_charset_info);
16546 
16547     table->field[IS_FILES_ENGINE]->set_notnull();
16548     table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16549                                          ndbcluster_hton_name_length,
16550                                          system_charset_info);
16551 
16552     table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16553     table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
16554 
16555     table->field[IS_FILES_VERSION]->set_notnull();
16556     table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);
16557 
16558     schema_table_store_record(thd, table);
16559   }
16560 
16561   NdbDictionary::Dictionary::List uflist;
16562   dict->listObjects(uflist, NdbDictionary::Object::Undofile);
16563   ndberr= dict->getNdbError();
16564   if (ndberr.classification != NdbError::NoError)
16565     ERR_RETURN(ndberr);
16566 
16567   for (i= 0; i < uflist.count; i++)
16568   {
16569     NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
16570     Ndb_cluster_connection_node_iter iter;
16571     unsigned id;
16572 
16573     g_ndb_cluster_connection->init_get_next_node(iter);
16574 
16575     while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
16576     {
16577       NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
16578       ndberr= dict->getNdbError();
16579       if (ndberr.classification != NdbError::NoError)
16580       {
16581         if (ndberr.classification == NdbError::SchemaError)
16582           continue;
16583         if (ndberr.classification == NdbError::UnknownResultError)
16584           continue;
16585         ERR_RETURN(ndberr);
16586       }
16587       NdbDictionary::LogfileGroup lfg=
16588         dict->getLogfileGroup(uf.getLogfileGroup());
16589       ndberr= dict->getNdbError();
16590       if (ndberr.classification != NdbError::NoError)
16591       {
16592         if (ndberr.classification == NdbError::SchemaError)
16593           continue;
16594         ERR_RETURN(ndberr);
16595       }
16596 
16597       init_fill_schema_files_row(table);
16598       table->field[IS_FILES_FILE_NAME]->set_notnull();
16599       table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name),
16600                                               system_charset_info);
16601       table->field[IS_FILES_FILE_TYPE]->set_notnull();
16602       table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
16603                                               system_charset_info);
16604       NdbDictionary::ObjectId objid;
16605       uf.getLogfileGroupId(&objid);
16606       table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16607       table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
16608                                                   strlen(uf.getLogfileGroup()),
16609                                                        system_charset_info);
16610       table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
16611       table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);
16612       table->field[IS_FILES_ENGINE]->set_notnull();
16613       table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16614                                            ndbcluster_hton_name_length,
16615                                            system_charset_info);
16616 
16617       table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
16618       table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);
16619       table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16620       table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
16621 
16622       table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
16623       table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);
16624       table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
16625       table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);
16626 
16627       table->field[IS_FILES_VERSION]->set_notnull();
16628       table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);
16629 
16630       char extra[100];
16631       int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
16632                            id, (ulong) lfg.getUndoBufferSize());
16633       table->field[IS_FILES_EXTRA]->set_notnull();
16634       table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16635       schema_table_store_record(thd, table);
16636     }
16637   }
16638 
16639   // now for LFGs
16640   NdbDictionary::Dictionary::List lfglist;
16641   dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
16642   ndberr= dict->getNdbError();
16643   if (ndberr.classification != NdbError::NoError)
16644     ERR_RETURN(ndberr);
16645 
16646   for (i= 0; i < lfglist.count; i++)
16647   {
16648     NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];
16649 
16650     NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
16651     ndberr= dict->getNdbError();
16652     if (ndberr.classification != NdbError::NoError)
16653     {
16654       if (ndberr.classification == NdbError::SchemaError)
16655         continue;
16656       ERR_RETURN(ndberr);
16657     }
16658 
16659     init_fill_schema_files_row(table);
16660     table->field[IS_FILES_FILE_TYPE]->set_notnull();
16661     table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
16662                                             system_charset_info);
16663 
16664     table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
16665     table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
16666                                                      strlen(elt.name),
16667                                                      system_charset_info);
16668     table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
16669     table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);
16670     table->field[IS_FILES_ENGINE]->set_notnull();
16671     table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
16672                                          ndbcluster_hton_name_length,
16673                                          system_charset_info);
16674 
16675     table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
16676     table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);
16677     table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
16678     table->field[IS_FILES_EXTENT_SIZE]->store(4, true);
16679 
16680     table->field[IS_FILES_VERSION]->set_notnull();
16681     table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);
16682 
16683     char extra[100];
16684     int len= my_snprintf(extra,sizeof(extra),
16685                          "UNDO_BUFFER_SIZE=%lu",
16686                          (ulong) lfg.getUndoBufferSize());
16687     table->field[IS_FILES_EXTRA]->set_notnull();
16688     table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
16689     schema_table_store_record(thd, table);
16690   }
16691   DBUG_RETURN(0);
16692 }
16693 
show_ndb_vars(THD * thd,SHOW_VAR * var,char * buff)16694 static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)
16695 {
16696   if (!check_ndb_in_thd(thd))
16697     return -1;
16698   struct st_ndb_status *st;
16699   SHOW_VAR *st_var;
16700   {
16701     char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +
16702                                 sizeof(ndb_status_variables_dynamic));
16703     st= new (mem) st_ndb_status;
16704     st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));
16705     memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));
16706     int i= 0;
16707     SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);
16708     for (; tmp->value; tmp++, i++)
16709       st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);
16710   }
16711   {
16712     Thd_ndb *thd_ndb= get_thd_ndb(thd);
16713     Ndb_cluster_connection *c= thd_ndb->connection;
16714     update_status_variables(thd_ndb, st, c);
16715   }
16716   var->type= SHOW_ARRAY;
16717   var->value= (char *) st_var;
16718   return 0;
16719 }
16720 
16721 SHOW_VAR ndb_status_variables_export[]= {
16722   {"Ndb",          (char*) &show_ndb_vars,                 SHOW_FUNC},
16723   {"Ndb_conflict", (char*) &ndb_status_conflict_variables, SHOW_ARRAY},
16724   {"Ndb",          (char*) &ndb_status_injector_variables, SHOW_ARRAY},
16725   {"Ndb",          (char*) &ndb_status_slave_variables,    SHOW_ARRAY},
16726   {"Ndb",          (char*) &show_ndb_server_api_stats,     SHOW_FUNC},
16727   {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY},
16728   {NullS, NullS, SHOW_LONG}
16729 };
16730 
16731 static MYSQL_SYSVAR_ULONG(
16732   cache_check_time,                  /* name */
16733   opt_ndb_cache_check_time,              /* var */
16734   PLUGIN_VAR_RQCMDARG,
16735   "A dedicated thread is created to, at the given "
16736   "millisecond interval, invalidate the query cache "
16737   "if another MySQL server in the cluster has changed "
16738   "the data in the database.",
16739   NULL,                              /* check func. */
16740   NULL,                              /* update func. */
16741   0,                                 /* default */
16742   0,                                 /* min */
16743   ONE_YEAR_IN_SECONDS,               /* max */
16744   0                                  /* block */
16745 );
16746 
16747 
16748 static MYSQL_SYSVAR_ULONG(
16749   extra_logging,                     /* name */
16750   opt_ndb_extra_logging,                 /* var */
16751   PLUGIN_VAR_OPCMDARG,
16752   "Turn on more logging in the error log.",
16753   NULL,                              /* check func. */
16754   NULL,                              /* update func. */
16755   1,                                 /* default */
16756   0,                                 /* min */
16757   0,                                 /* max */
16758   0                                  /* block */
16759 );
16760 
16761 
16762 static MYSQL_SYSVAR_ULONG(
16763   wait_connected,                    /* name */
16764   opt_ndb_wait_connected,            /* var */
16765   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16766   "Time (in seconds) for mysqld to wait for connection "
16767   "to cluster management and data nodes.",
16768   NULL,                              /* check func. */
16769   NULL,                              /* update func. */
16770   0,                                 /* default */
16771   0,                                 /* min */
16772   ONE_YEAR_IN_SECONDS,               /* max */
16773   0                                  /* block */
16774 );
16775 
16776 
16777 static MYSQL_SYSVAR_ULONG(
16778   wait_setup,                        /* name */
16779   opt_ndb_wait_setup,                /* var */
16780   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16781   "Time (in seconds) for mysqld to wait for setup to "
16782   "complete (0 = no wait)",
16783   NULL,                              /* check func. */
16784   NULL,                              /* update func. */
16785   15,                                /* default */
16786   0,                                 /* min */
16787   ONE_YEAR_IN_SECONDS,               /* max */
16788   0                                  /* block */
16789 );
16790 
16791 
16792 static MYSQL_SYSVAR_UINT(
16793   cluster_connection_pool,           /* name */
16794   opt_ndb_cluster_connection_pool,   /* var */
16795   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16796   "Pool of cluster connections to be used by mysql server.",
16797   NULL,                              /* check func. */
16798   NULL,                              /* update func. */
16799   1,                                 /* default */
16800   1,                                 /* min */
16801   63,                                /* max */
16802   0                                  /* block */
16803 );
16804 
16805 /* should be in index_stat.h */
16806 
16807 extern int
16808 ndb_index_stat_option_check(MYSQL_THD,
16809                             struct st_mysql_sys_var *var,
16810                             void *save,
16811                             struct st_mysql_value *value);
16812 extern void
16813 ndb_index_stat_option_update(MYSQL_THD,
16814                              struct st_mysql_sys_var *var,
16815                              void *var_ptr,
16816                              const void *save);
16817 
16818 extern char ndb_index_stat_option_buf[];
16819 
16820 static MYSQL_SYSVAR_STR(
16821   index_stat_option,                /* name */
16822   opt_ndb_index_stat_option,        /* var */
16823   PLUGIN_VAR_RQCMDARG,
16824   "Comma-separated tunable options for ndb index statistics",
16825   ndb_index_stat_option_check,      /* check func. */
16826   ndb_index_stat_option_update,     /* update func. */
16827   ndb_index_stat_option_buf
16828 );
16829 
16830 
16831 ulong opt_ndb_report_thresh_binlog_epoch_slip;
16832 static MYSQL_SYSVAR_ULONG(
16833   report_thresh_binlog_epoch_slip,   /* name */
16834   opt_ndb_report_thresh_binlog_epoch_slip,/* var */
16835   PLUGIN_VAR_RQCMDARG,
16836   "Threshold on number of epochs to be behind before reporting binlog "
16837   "status. E.g. 3 means that if the difference between what epoch has "
16838   "been received from the storage nodes and what has been applied to "
16839   "the binlog is 3 or more, a status message will be sent to the cluster "
16840   "log.",
16841   NULL,                              /* check func. */
16842   NULL,                              /* update func. */
16843   3,                                 /* default */
16844   0,                                 /* min */
16845   256,                               /* max */
16846   0                                  /* block */
16847 );
16848 
16849 
16850 ulong opt_ndb_report_thresh_binlog_mem_usage;
16851 static MYSQL_SYSVAR_ULONG(
16852   report_thresh_binlog_mem_usage,    /* name */
16853   opt_ndb_report_thresh_binlog_mem_usage,/* var */
16854   PLUGIN_VAR_RQCMDARG,
16855   "Threshold on percentage of free memory before reporting binlog "
16856   "status. E.g. 10 means that if amount of available memory for "
16857   "receiving binlog data from the storage nodes goes below 10%, "
16858   "a status message will be sent to the cluster log.",
16859   NULL,                              /* check func. */
16860   NULL,                              /* update func. */
16861   10,                                /* default */
16862   0,                                 /* min */
16863   100,                               /* max */
16864   0                                  /* block */
16865 );
16866 
16867 
16868 my_bool opt_ndb_log_update_as_write;
16869 static MYSQL_SYSVAR_BOOL(
16870   log_update_as_write,               /* name */
16871   opt_ndb_log_update_as_write,       /* var */
16872   PLUGIN_VAR_OPCMDARG,
16873   "For efficiency log only after image as a write event. "
16874   "Ignore before image. This may cause compatability problems if "
16875   "replicating to other storage engines than ndbcluster.",
16876   NULL,                              /* check func. */
16877   NULL,                              /* update func. */
16878   1                                  /* default */
16879 );
16880 
16881 
16882 my_bool opt_ndb_log_updated_only;
16883 static MYSQL_SYSVAR_BOOL(
16884   log_updated_only,                  /* name */
16885   opt_ndb_log_updated_only,          /* var */
16886   PLUGIN_VAR_OPCMDARG,
16887   "For efficiency log only updated columns. Columns are considered "
16888   "as \"updated\" even if they are updated with the same value. "
16889   "This may cause compatability problems if "
16890   "replicating to other storage engines than ndbcluster.",
16891   NULL,                              /* check func. */
16892   NULL,                              /* update func. */
16893   1                                  /* default */
16894 );
16895 
16896 
16897 my_bool opt_ndb_log_orig;
16898 static MYSQL_SYSVAR_BOOL(
16899   log_orig,                          /* name */
16900   opt_ndb_log_orig,                  /* var */
16901   PLUGIN_VAR_OPCMDARG,
16902   "Log originating server id and epoch in ndb_binlog_index. Each epoch "
16903   "may in this case have multiple rows in ndb_binlog_index, one for "
16904   "each originating epoch.",
16905   NULL,                              /* check func. */
16906   NULL,                              /* update func. */
16907   0                                  /* default */
16908 );
16909 
16910 
16911 my_bool opt_ndb_log_bin;
16912 static MYSQL_SYSVAR_BOOL(
16913   log_bin,                           /* name */
16914   opt_ndb_log_bin,                   /* var */
16915   PLUGIN_VAR_OPCMDARG,
16916   "Log ndb tables in the binary log. Option only has meaning if "
16917   "the binary log has been turned on for the server.",
16918   NULL,                              /* check func. */
16919   NULL,                              /* update func. */
16920   1                                  /* default */
16921 );
16922 
16923 
16924 my_bool opt_ndb_log_binlog_index;
16925 static MYSQL_SYSVAR_BOOL(
16926   log_binlog_index,                  /* name */
16927   opt_ndb_log_binlog_index,          /* var */
16928   PLUGIN_VAR_OPCMDARG,
16929   "Insert mapping between epochs and binlog positions into the "
16930   "ndb_binlog_index table.",
16931   NULL,                              /* check func. */
16932   NULL,                              /* update func. */
16933   1                                  /* default */
16934 );
16935 
16936 
16937 static my_bool opt_ndb_log_empty_epochs;
16938 static MYSQL_SYSVAR_BOOL(
16939   log_empty_epochs,                  /* name */
16940   opt_ndb_log_empty_epochs,          /* var */
16941   PLUGIN_VAR_OPCMDARG,
16942   "",
16943   NULL,                              /* check func. */
16944   NULL,                              /* update func. */
16945   0                                  /* default */
16946 );
16947 
ndb_log_empty_epochs(void)16948 bool ndb_log_empty_epochs(void)
16949 {
16950   return opt_ndb_log_empty_epochs;
16951 }
16952 
16953 my_bool opt_ndb_log_apply_status;
16954 static MYSQL_SYSVAR_BOOL(
16955   log_apply_status,                 /* name */
16956   opt_ndb_log_apply_status,         /* var */
16957   PLUGIN_VAR_OPCMDARG,
16958   "Log ndb_apply_status updates from Master in the Binlog",
16959   NULL,                             /* check func. */
16960   NULL,                             /* update func. */
16961   0                                 /* default */
16962 );
16963 
16964 
16965 static MYSQL_SYSVAR_STR(
16966   connectstring,                    /* name */
16967   opt_ndb_connectstring,            /* var */
16968   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16969   "Connect string for ndbcluster.",
16970   NULL,                             /* check func. */
16971   NULL,                             /* update func. */
16972   NULL                              /* default */
16973 );
16974 
16975 
16976 static MYSQL_SYSVAR_STR(
16977   mgmd_host,                        /* name */
16978   opt_ndb_connectstring,                /* var */
16979   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16980   "Same as --ndb-connectstring",
16981   NULL,                             /* check func. */
16982   NULL,                             /* update func. */
16983   NULL                              /* default */
16984 );
16985 
16986 
16987 static MYSQL_SYSVAR_UINT(
16988   nodeid,                           /* name */
16989   opt_ndb_nodeid,                   /* var */
16990   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
16991   "Set nodeid for this node. Overrides node id specified "
16992   "in --ndb-connectstring.",
16993   NULL,                             /* check func. */
16994   NULL,                             /* update func. */
16995   0,                                /* default */
16996   0,                                /* min */
16997   MAX_NODES_ID,                     /* max */
16998   0                                 /* block */
16999 );
17000 
17001 #ifndef DBUG_OFF
17002 
17003 static
17004 void
dbug_check_shares(THD *,st_mysql_sys_var *,void *,const void *)17005 dbug_check_shares(THD*, st_mysql_sys_var*, void*, const void*)
17006 {
17007   sql_print_information("dbug_check_shares");
17008   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
17009   {
17010     NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
17011     sql_print_information("  %s.%s: state: %s(%u) use_count: %u",
17012                           share->db, share->table_name,
17013                           get_share_state_string(share->state),
17014                           (unsigned)share->state,
17015                           share->use_count);
17016   }
17017 
17018   /**
17019    * Only shares in mysql database may be open...
17020    */
17021   for (uint i= 0; i < ndbcluster_open_tables.records; i++)
17022   {
17023     NDB_SHARE * share = (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
17024     DBUG_ASSERT(strcmp(share->db, "mysql") == 0);
17025   }
17026 }
17027 
17028 static MYSQL_THDVAR_UINT(
17029   check_shares,              /* name */
17030   PLUGIN_VAR_RQCMDARG,
17031   "Debug, only...check that no shares are lingering...",
17032   NULL,                              /* check func */
17033   dbug_check_shares,                 /* update func */
17034   0,                                 /* default */
17035   0,                                 /* min */
17036   1,                                 /* max */
17037   0                                  /* block */
17038 );
17039 
17040 #endif
17041 
17042 static struct st_mysql_sys_var* system_variables[]= {
17043   MYSQL_SYSVAR(cache_check_time),
17044   MYSQL_SYSVAR(extra_logging),
17045   MYSQL_SYSVAR(wait_connected),
17046   MYSQL_SYSVAR(wait_setup),
17047   MYSQL_SYSVAR(cluster_connection_pool),
17048   MYSQL_SYSVAR(report_thresh_binlog_mem_usage),
17049   MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),
17050   MYSQL_SYSVAR(log_update_as_write),
17051   MYSQL_SYSVAR(log_updated_only),
17052   MYSQL_SYSVAR(log_orig),
17053   MYSQL_SYSVAR(distribution),
17054   MYSQL_SYSVAR(autoincrement_prefetch_sz),
17055   MYSQL_SYSVAR(force_send),
17056   MYSQL_SYSVAR(use_exact_count),
17057   MYSQL_SYSVAR(use_transactions),
17058   MYSQL_SYSVAR(use_copying_alter_table),
17059   MYSQL_SYSVAR(optimized_node_selection),
17060   MYSQL_SYSVAR(batch_size),
17061   MYSQL_SYSVAR(optimization_delay),
17062   MYSQL_SYSVAR(index_stat_enable),
17063   MYSQL_SYSVAR(index_stat_option),
17064   MYSQL_SYSVAR(index_stat_cache_entries),
17065   MYSQL_SYSVAR(index_stat_update_freq),
17066   MYSQL_SYSVAR(table_no_logging),
17067   MYSQL_SYSVAR(table_temporary),
17068   MYSQL_SYSVAR(log_bin),
17069   MYSQL_SYSVAR(log_binlog_index),
17070   MYSQL_SYSVAR(log_empty_epochs),
17071   MYSQL_SYSVAR(log_apply_status),
17072   MYSQL_SYSVAR(connectstring),
17073   MYSQL_SYSVAR(mgmd_host),
17074   MYSQL_SYSVAR(nodeid),
17075   MYSQL_SYSVAR(blob_read_batch_bytes),
17076   MYSQL_SYSVAR(blob_write_batch_bytes),
17077   MYSQL_SYSVAR(deferred_constraints),
17078   MYSQL_SYSVAR(join_pushdown),
17079 #ifndef DBUG_OFF
17080   MYSQL_SYSVAR(check_shares),
17081 #endif
17082   NULL
17083 };
17084 
17085 struct st_mysql_storage_engine ndbcluster_storage_engine=
17086 { MYSQL_HANDLERTON_INTERFACE_VERSION };
17087 
17088 
17089 #include "ha_ndbinfo.h"
17090 
17091 extern struct st_mysql_sys_var* ndbinfo_system_variables[];
17092 
17093 struct st_mysql_storage_engine ndbinfo_storage_engine=
17094 { MYSQL_HANDLERTON_INTERFACE_VERSION };
17095 
mysql_declare_plugin(ndbcluster)17096 mysql_declare_plugin(ndbcluster)
17097 {
17098   MYSQL_STORAGE_ENGINE_PLUGIN,
17099   &ndbcluster_storage_engine,
17100   ndbcluster_hton_name,
17101   "MySQL AB",
17102   "Clustered, fault-tolerant tables",
17103   PLUGIN_LICENSE_GPL,
17104   ndbcluster_init,            /* plugin init */
17105   NULL,                       /* plugin deinit */
17106   0x0100,                     /* plugin version */
17107   ndb_status_variables_export,/* status variables                */
17108   system_variables,           /* system variables */
17109   NULL,                       /* config options                  */
17110   0                           /* flags */
17111 },
17112 {
17113   MYSQL_STORAGE_ENGINE_PLUGIN,
17114   &ndbinfo_storage_engine,
17115   "ndbinfo",
17116   "Sun Microsystems Inc.",
17117   "MySQL Cluster system information storage engine",
17118   PLUGIN_LICENSE_GPL,
17119   ndbinfo_init,               /* plugin init */
17120   ndbinfo_deinit,             /* plugin deinit */
17121   0x0001,                     /* plugin version */
17122   NULL,                       /* status variables */
17123   ndbinfo_system_variables,   /* system variables */
17124   NULL,                        /* config options */
17125   0                           /* flags */
17126 }
17127 mysql_declare_plugin_end;
17128 
17129 #endif
17130