1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 TokuDBis is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 TokuDB is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
21
22 ======= */
23
24 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25
26 #ifndef _HA_TOKUDB_H
27 #define _HA_TOKUDB_H
28
29 #include "hatoku_hton.h"
30 #include "hatoku_cmp.h"
31 #include "tokudb_background.h"
32
33 #define HA_TOKU_ORIG_VERSION 4
34 #define HA_TOKU_VERSION 4
35 //
36 // no capabilities yet
37 //
38 #define HA_TOKU_CAP 0
39
40 class ha_tokudb;
41
42 typedef struct loader_context {
43 THD* thd;
44 char write_status_msg[1024];
45 ha_tokudb* ha;
46 } *LOADER_CONTEXT;
47
48 //
49 // This class stores table information that is to be shared
50 // among all ha_tokudb objects.
51 // There is one instance per table, shared among handlers.
52 // Some of the variables here are the DB* pointers to indexes,
53 // and auto increment information.
54 //
55 // When the last user releases it's reference on the share,
56 // it closes all of its database handles and releases all info
57 // The share instance stays around though so some data can be transiently
58 // kept across open-close-open-close cycles. These data will be explicitly
59 // noted below.
60 //
61 class TOKUDB_SHARE {
62 public:
63 enum share_state_t {
64 CLOSED = 0,
65 OPENED = 1,
66 ERROR = 2
67 };
68
69 // one time, start up init
70 static void static_init();
71
72 // one time, shutdown destroy
73 static void static_destroy();
74
75 // retuns a locked, properly reference counted share
76 // callers must check to ensure share is in correct state for callers use
77 // and unlock the share.
78 // if create_new is set, a new "CLOSED" share will be created if one
79 // doesn't exist, otherwise will return NULL if an existing is not found.
80 static TOKUDB_SHARE* get_share(
81 const char* table_name,
82 THR_LOCK_DATA* data,
83 bool create_new);
84
85 // removes a share entirely from the pool, call to rename/deleta a table
86 // caller must hold ddl_mutex on this share and the share MUST have
87 // exactly 0 _use_count
88 static void drop_share(TOKUDB_SHARE* share);
89
90 // returns state string for logging/reporting
91 static const char* get_state_string(share_state_t state);
92
93 void* operator new(size_t sz);
94 void operator delete(void* p);
95
96 TOKUDB_SHARE();
97
98 // increases the ref count and waits for any currently executing state
99 // transition to complete
100 // returns current state and leaves share locked
101 // callers must check to ensure share is in correct state for callers use
102 // and unlock the share.
103 share_state_t addref();
104
105 // decreases the ref count and potentially closes the share
106 // caller must not have ownership of mutex, will lock and release
107 int release();
108
109 // returns the current use count
110 // no locking requirements
111 inline int use_count() const;
112
113 // locks the share
114 inline void lock() const;
115
116 // unlocks the share
117 inline void unlock() const;
118
119 // returns the current state of the share
120 // no locking requirements
121 inline share_state_t state() const;
122
123 // sets the state of the share
124 // caller must hold mutex on this share
125 inline void set_state(share_state_t state);
126
127 // returns the full MySQL table name of the table ex:
128 // ./database/table
129 // no locking requirements
130 inline const char* full_table_name() const;
131
132 // returns the strlen of the full table name
133 // no locking requirements
134 inline uint full_table_name_length() const;
135
136 // returns the parsed database name this table resides in
137 // no locking requirements
138 inline const char* database_name() const;
139
140 // returns the strlen of the database name
141 // no locking requirements
142 inline uint database_name_length() const;
143
144 // returns the parsed table name of this table
145 // no locking requirements
146 inline const char* table_name() const;
147
148 // returns the strlen of the the table name
149 // no locking requirements
150 inline uint table_name_length() const;
151
152 // sets the estimated number of rows in the table
153 // should be called only during share initialization and info call
154 // caller must hold mutex on this share unless specified by 'locked'
155 inline void set_row_count(uint64_t rows, bool locked);
156
157 // updates tracked row count and ongoing table change delta tracking
158 // called from any ha_tokudb operation that inserts/modifies/deletes rows
159 // may spawn background analysis if enabled, allowed and threshold hit
160 // caller must not have ownership of mutex, will lock and release
161 void update_row_count(
162 THD* thd,
163 uint64_t added,
164 uint64_t deleted,
165 uint64_t updated);
166
167 // returns the current row count estimate
168 // no locking requirements
169 inline ha_rows row_count() const;
170
171 // initializes cardinality statistics, takes ownership of incoming buffer
172 // caller must hold mutex on this share
173 inline void init_cardinality_counts(
174 uint32_t rec_per_keys,
175 uint64_t* rec_per_key);
176
177 // update the cardinality statistics. number of records must match
178 // caller must hold mutex on this share
179 inline void update_cardinality_counts(
180 uint32_t rec_per_keys,
181 const uint64_t* rec_per_key);
182
183 // disallow any auto analysis from taking place
184 // caller must hold mutex on this share
185 inline void disallow_auto_analysis();
186
187 // allow any auto analysis to take place
188 // pass in true for 'reset_deltas' to reset delta counting to 0
189 // caller must hold mutex on this share
190 inline void allow_auto_analysis(bool reset_deltas);
191
192 // cancels all background jobs for this share
193 // no locking requirements
194 inline void cancel_background_jobs() const;
195
196 // copies cardinality statistics into TABLE counter set
197 // caller must not have ownership of mutex, will lock and release
198 void set_cardinality_counts_in_table(TABLE* table);
199
200 // performs table analysis on underlying indices and produces estimated
201 // cardinality statistics.
202 // on success updates cardinality counts in status database and this share
203 // MUST pass a valid THD to access session variables.
204 // MAY pass txn. If txn is passed, assumes an explicit user scheduled
205 // ANALYZE and not an auto ANALYZE resulting from delta threshold
206 // uses session variables:
207 // tokudb_analyze_in_background, tokudb_analyze_throttle,
208 // tokudb_analyze_time, and tokudb_analyze_delete_fraction
209 // caller must hold mutex on this share
210 int analyze_standard(THD* thd, DB_TXN* txn);
211
212 // performs table scan and updates the internal FT logical row count value
213 // on success also updates share row count estimate.
214 // MUST pass a valid THD to access session variables.
215 // MAY pass txn. If txn is passed, assumes an explicit user scheduled
216 // uses session variables:
217 // tokudb_analyze_in_background, and tokudb_analyze_throttle
218 // caller must not have ownership of mutex, will lock and release
219 int analyze_recount_rows(THD* thd, DB_TXN* txn);
220
221 public:
222 //*********************************
223 // Destroyed and recreated on open-close-open
224 ulonglong auto_ident;
225 ulonglong last_auto_increment, auto_inc_create_value;
226
227 // estimate on number of rows added in the process of a locked tables
228 // this is so we can better estimate row count during a lock table
229 ha_rows rows_from_locked_table;
230 DB* status_block;
231
232 // DB that is indexed on the primary key
233 DB* file;
234
235 // array of all DB's that make up table, includes DB that
236 // is indexed on the primary key, add 1 in case primary
237 // key is hidden
238 DB* key_file[MAX_KEY + 1];
239 uint status, version, capabilities;
240 uint ref_length;
241
242 // whether table has an auto increment column
243 bool has_auto_inc;
244
245 // index of auto increment column in table->field, if auto_inc exists
246 uint ai_field_index;
247
248 // whether the primary key has a string
249 bool pk_has_string;
250
251 KEY_AND_COL_INFO kc_info;
252
253 // key info copied from TABLE_SHARE, used by background jobs that have no
254 // access to a handler instance
255 uint _keys;
256 uint _max_key_parts;
257 struct key_descriptor_t {
258 uint _parts;
259 bool _is_unique;
260 char* _name;
261 };
262 key_descriptor_t* _key_descriptors;
263
264 // we want the following optimization for bulk loads, if the table is empty,
265 // attempt to grab a table lock. emptiness check can be expensive,
266 // so we try it once for a table. After that, we keep this variable around
267 // to tell us to not try it again.
268 bool try_table_lock;
269
270 bool has_unique_keys;
271 bool replace_into_fast;
272 tokudb::thread::rwlock_t _num_DBs_lock;
273 uint32_t num_DBs;
274
275 private:
276 static std::unordered_map<std::string, TOKUDB_SHARE*> _open_tables;
277 static tokudb::thread::mutex_t _open_tables_mutex;
278
279 //*********************************
280 // Spans open-close-open
281 mutable tokudb::thread::mutex_t _mutex;
282 mutable tokudb::thread::mutex_t _ddl_mutex;
283 uint _use_count;
284
285 share_state_t _state;
286
287 ulonglong _row_delta_activity;
288 bool _allow_auto_analysis;
289
290 String _full_table_name;
291 String _database_name;
292 String _table_name;
293
294 //*********************************
295 // Destroyed and recreated on open-close-open
296 THR_LOCK _thr_lock;
297
298 // estimate on number of rows in table
299 ha_rows _rows;
300
301 // cardinality counts
302 uint32_t _rec_per_keys;
303 uint64_t* _rec_per_key;
304
305 void init(const char* table_name);
306 void destroy();
307 };
use_count()308 inline int TOKUDB_SHARE::use_count() const {
309 return _use_count;
310 }
lock()311 inline void TOKUDB_SHARE::lock() const {
312 TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]",
313 _full_table_name.ptr(),
314 get_state_string(_state),
315 _use_count);
316 mutex_t_lock(_mutex);
317 TOKUDB_SHARE_DBUG_VOID_RETURN();
318 }
unlock()319 inline void TOKUDB_SHARE::unlock() const {
320 TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]",
321 _full_table_name.ptr(),
322 get_state_string(_state),
323 _use_count);
324 mutex_t_unlock(_mutex);
325 TOKUDB_SHARE_DBUG_VOID_RETURN();
326 }
state()327 inline TOKUDB_SHARE::share_state_t TOKUDB_SHARE::state() const {
328 return _state;
329 }
set_state(TOKUDB_SHARE::share_state_t state)330 inline void TOKUDB_SHARE::set_state(TOKUDB_SHARE::share_state_t state) {
331 TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]:new_state[%s]",
332 _full_table_name.ptr(),
333 get_state_string(_state),
334 _use_count,
335 get_state_string(state));
336
337 assert_debug(_mutex.is_owned_by_me());
338 _state = state;
339 TOKUDB_SHARE_DBUG_VOID_RETURN();
340 }
full_table_name()341 inline const char* TOKUDB_SHARE::full_table_name() const {
342 return _full_table_name.ptr();
343 }
full_table_name_length()344 inline uint TOKUDB_SHARE::full_table_name_length() const {
345 return _full_table_name.length();
346 }
database_name()347 inline const char* TOKUDB_SHARE::database_name() const {
348 return _database_name.ptr();
349 }
database_name_length()350 inline uint TOKUDB_SHARE::database_name_length() const {
351 return _database_name.length();
352 }
table_name()353 inline const char* TOKUDB_SHARE::table_name() const {
354 return _table_name.ptr();
355 }
table_name_length()356 inline uint TOKUDB_SHARE::table_name_length() const {
357 return _table_name.length();
358 }
set_row_count(uint64_t rows,bool locked)359 inline void TOKUDB_SHARE::set_row_count(uint64_t rows, bool locked) {
360 TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]:rows[%" PRIu64 "]:locked[%d]",
361 _full_table_name.ptr(),
362 get_state_string(_state),
363 _use_count,
364 rows,
365 locked);
366
367 if (!locked) {
368 lock();
369 } else {
370 assert_debug(_mutex.is_owned_by_me());
371 }
372 if (_rows && rows == 0)
373 _row_delta_activity = 0;
374
375 _rows = rows;
376 if (!locked) {
377 unlock();
378 }
379 TOKUDB_SHARE_DBUG_VOID_RETURN();
380 }
row_count()381 inline ha_rows TOKUDB_SHARE::row_count() const {
382 return _rows;
383 }
init_cardinality_counts(uint32_t rec_per_keys,uint64_t * rec_per_key)384 inline void TOKUDB_SHARE::init_cardinality_counts(
385 uint32_t rec_per_keys,
386 uint64_t* rec_per_key) {
387
388 assert_debug(_mutex.is_owned_by_me());
389 // can not change number of keys live
390 assert_always(_rec_per_key == nullptr);
391 assert_always(_rec_per_keys == 0);
392 _rec_per_keys = rec_per_keys;
393 _rec_per_key = rec_per_key;
394 }
update_cardinality_counts(uint32_t rec_per_keys,const uint64_t * rec_per_key)395 inline void TOKUDB_SHARE::update_cardinality_counts(
396 uint32_t rec_per_keys,
397 const uint64_t* rec_per_key) {
398
399 assert_debug(_mutex.is_owned_by_me());
400 // can not change number of keys live
401 assert_always(rec_per_keys == _rec_per_keys);
402 assert_always(rec_per_key != NULL);
403 memcpy(_rec_per_key, rec_per_key, _rec_per_keys * sizeof(uint64_t));
404 }
disallow_auto_analysis()405 inline void TOKUDB_SHARE::disallow_auto_analysis() {
406 assert_debug(_mutex.is_owned_by_me());
407 _allow_auto_analysis = false;
408 }
allow_auto_analysis(bool reset_deltas)409 inline void TOKUDB_SHARE::allow_auto_analysis(bool reset_deltas) {
410 assert_debug(_mutex.is_owned_by_me());
411 _allow_auto_analysis = true;
412 if (reset_deltas)
413 _row_delta_activity = 0;
414 }
cancel_background_jobs()415 inline void TOKUDB_SHARE::cancel_background_jobs() const {
416 tokudb::background::_job_manager->cancel_job(full_table_name());
417 }
418
419
420
421 typedef struct st_filter_key_part_info {
422 uint offset;
423 uint part_index;
424 } FILTER_KEY_PART_INFO;
425
426 typedef enum {
427 lock_read = 0,
428 lock_write
429 } TABLE_LOCK_TYPE;
430
431 // the number of rows bulk fetched in one callback grows exponentially
432 // with the bulk fetch iteration, so the max iteration is the max number
433 // of shifts we can perform on a 64 bit integer.
434 #define HA_TOKU_BULK_FETCH_ITERATION_MAX 63
435
436 class ha_tokudb : public handler {
437 private:
438 THR_LOCK_DATA lock; ///< MySQL lock
439 TOKUDB_SHARE *share; ///< Shared lock info
440
441 #ifdef MARIADB_BASE_VERSION
442 // MariaDB version of MRR
443 DsMrr_impl ds_mrr;
444 #elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
445 // MySQL version of MRR
446 DsMrr_impl ds_mrr;
447 #endif
448
449 // For ICP. Cache our own copies
450 Item* toku_pushed_idx_cond;
451 uint toku_pushed_idx_cond_keyno; /* The index which the above condition is for */
452 bool icp_went_out_of_range;
453
454 //
455 // last key returned by ha_tokudb's cursor
456 //
457 DBT last_key;
458 //
459 // pointer used for multi_alloc of key_buff, key_buff2, primary_key_buff
460 //
461 void *alloc_ptr;
462 //
463 // buffer used to temporarily store a "packed row"
464 // data pointer of a DBT will end up pointing to this
465 // see pack_row for usage
466 //
467 uchar *rec_buff;
468 //
469 // number of bytes allocated in rec_buff
470 //
471 ulong alloced_rec_buff_length;
472 //
473 // same as above two, but for updates
474 //
475 uchar *rec_update_buff;
476 ulong alloced_update_rec_buff_length;
477 uint32_t max_key_length;
478
479 uchar* range_query_buff; // range query buffer
480 uint32_t size_range_query_buff; // size of the allocated range query buffer
481 uint32_t bytes_used_in_range_query_buff; // number of bytes used in the range query buffer
482 uint32_t curr_range_query_buff_offset; // current offset into the range query buffer for queries to read
483 uint64_t bulk_fetch_iteration;
484 uint64_t rows_fetched_using_bulk_fetch;
485 bool doing_bulk_fetch;
486 bool maybe_index_scan;
487
488 //
489 // buffer used to temporarily store a "packed key"
490 // data pointer of a DBT will end up pointing to this
491 //
492 uchar *key_buff;
493 //
494 // buffer used to temporarily store a "packed key"
495 // data pointer of a DBT will end up pointing to this
496 // This is used in functions that require the packing
497 // of more than one key
498 //
499 uchar *key_buff2;
500 uchar *key_buff3;
501 uchar *key_buff4;
502 //
503 // buffer used to temporarily store a "packed key"
504 // data pointer of a DBT will end up pointing to this
505 // currently this is only used for a primary key in
506 // the function update_row, hence the name. It
507 // does not carry any state throughout the class.
508 //
509 uchar *primary_key_buff;
510
511 //
512 // ranges of prelocked area, used to know how much to bulk fetch
513 //
514 uchar *prelocked_left_range;
515 uint32_t prelocked_left_range_size;
516 uchar *prelocked_right_range;
517 uint32_t prelocked_right_range_size;
518
519
520 //
521 // individual DBTs for each index
522 //
523 DBT_ARRAY mult_key_dbt_array[2*(MAX_KEY + 1)];
524 DBT_ARRAY mult_rec_dbt_array[MAX_KEY + 1];
525 uint32_t mult_put_flags[MAX_KEY + 1];
526 uint32_t mult_del_flags[MAX_KEY + 1];
527 uint32_t mult_dbt_flags[MAX_KEY + 1];
528
529
530 //
531 // when unpacking blobs, we need to store it in a temporary
532 // buffer that will persist because MySQL just gets a pointer to the
533 // blob data, a pointer we need to ensure is valid until the next
534 // query
535 //
536 uchar* blob_buff;
537 uint32_t num_blob_bytes;
538
539 bool unpack_entire_row;
540
541 //
542 // buffers (and their sizes) that will hold the indexes
543 // of fields that need to be read for a query
544 //
545 uint32_t* fixed_cols_for_query;
546 uint32_t num_fixed_cols_for_query;
547 uint32_t* var_cols_for_query;
548 uint32_t num_var_cols_for_query;
549 bool read_blobs;
550 bool read_key;
551
552 //
553 // transaction used by ha_tokudb's cursor
554 //
555 DB_TXN *transaction;
556
557 // external_lock will set this true for read operations that will be closely followed by write operations.
558 bool use_write_locks; // use write locks for reads
559
560 //
561 // instance of cursor being used for init_xxx and rnd_xxx functions
562 //
563 DBC *cursor;
564 uint32_t cursor_flags; // flags for cursor
565 //
566 // flags that are returned in table_flags()
567 //
568 ulonglong int_table_flags;
569 //
570 // count on the number of rows that gets changed, such as when write_row occurs
571 // this is meant to help keep estimate on number of elements in DB
572 //
573 ulonglong added_rows;
574 ulonglong deleted_rows;
575 ulonglong updated_rows;
576
577
578 uint last_dup_key;
579 //
580 // if set to 0, then the primary key is not hidden
581 // if non-zero (not necessarily 1), primary key is hidden
582 //
583 uint hidden_primary_key;
584 bool key_read, using_ignore;
585 bool using_ignore_no_key;
586
587 //
588 // After a cursor encounters an error, the cursor will be unusable
589 // In case MySQL attempts to do a cursor operation (such as rnd_next
590 // or index_prev), we will gracefully return this error instead of crashing
591 //
592 int last_cursor_error;
593
594 //
595 // For instances where we successfully prelock a range or a table,
596 // we set this to true so that successive cursor calls can know
597 // know to limit the locking overhead in a call to the fractal tree
598 //
599 bool range_lock_grabbed;
600 bool range_lock_grabbed_null;
601
602 //
603 // For bulk inserts, we want option of not updating auto inc
604 // until all inserts are done. By default, is false
605 //
606 bool delay_updating_ai_metadata; // if true, don't update auto-increment metadata until bulk load completes
607 bool ai_metadata_update_required; // if true, autoincrement metadata must be updated
608
609 //
610 // buffer for updating the status of long insert, delete, and update
611 // statements. Right now, the the messages are
612 // "[inserted|updated|deleted] about %llu rows",
613 // so a buffer of 200 is good enough.
614 //
615 char write_status_msg[200]; //buffer of 200 should be a good upper bound.
616 struct loader_context lc;
617
618 DB_LOADER* loader;
619 bool abort_loader;
620 int loader_error;
621
622 bool num_DBs_locked_in_bulk;
623 uint32_t lock_count;
624
625 bool fix_rec_buff_for_blob(ulong length);
626 bool fix_rec_update_buff_for_blob(ulong length);
627 uchar current_ident[TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH];
628
629 ulong max_row_length(const uchar * buf);
630 int pack_row_in_buff(
631 DBT * row,
632 const uchar* record,
633 uint index,
634 uchar* row_buff
635 );
636 int pack_row(
637 DBT * row,
638 const uchar* record,
639 uint index
640 );
641 int pack_old_row_for_update(
642 DBT * row,
643 const uchar* record,
644 uint index
645 );
646 uint32_t place_key_into_mysql_buff(KEY* key_info, uchar * record, uchar* data);
647 void unpack_key(uchar * record, DBT const *key, uint index);
648 uint32_t place_key_into_dbt_buff(KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length);
649 DBT* create_dbt_key_from_key(DBT * key, KEY* key_info, uchar * buff, const uchar * record, bool* has_null, bool dont_pack_pk, int key_length, uint8_t inf_byte);
650 DBT *create_dbt_key_from_table(DBT * key, uint keynr, uchar * buff, const uchar * record, bool* has_null, int key_length = MAX_KEY_LENGTH);
651 DBT* create_dbt_key_for_lookup(DBT * key, KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length = MAX_KEY_LENGTH);
652 DBT *pack_key(DBT * key, uint keynr, uchar * buff, const uchar * key_ptr, uint key_length, int8_t inf_byte);
653 #if defined(TOKU_INCLUDE_EXTENDED_KEYS) && TOKU_INCLUDE_EXTENDED_KEYS
654 DBT *pack_ext_key(DBT * key, uint keynr, uchar * buff, const uchar * key_ptr, uint key_length, int8_t inf_byte);
655 #endif // defined(TOKU_INCLUDE_EXTENDED_KEYS) && TOKU_INCLUDE_EXTENDED_KEYS
656 bool key_changed(uint keynr, const uchar * old_row, const uchar * new_row);
657 int handle_cursor_error(int error, int err_to_return);
658 DBT *get_pos(DBT * to, uchar * pos);
659
660 int open_main_dictionary(const char* name, bool is_read_only, DB_TXN* txn);
661 int open_secondary_dictionary(DB** ptr, KEY* key_info, const char* name, bool is_read_only, DB_TXN* txn);
662 int acquire_table_lock (DB_TXN* trans, TABLE_LOCK_TYPE lt);
663 int estimate_num_rows(DB* db, uint64_t* num_rows, DB_TXN* txn);
664 bool has_auto_increment_flag(uint* index);
665
666 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
667 int write_frm_data(DB* db, DB_TXN* txn, const char* frm_name);
668 int verify_frm_data(const char* frm_name, DB_TXN* trans);
669 int remove_frm_data(DB *db, DB_TXN *txn);
670 #endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
671
672 int write_to_status(DB* db, HA_METADATA_KEY curr_key_data, void* data, uint size, DB_TXN* txn);
673 int remove_from_status(DB* db, HA_METADATA_KEY curr_key_data, DB_TXN* txn);
674
675 int write_metadata(DB* db, void* key, uint key_size, void* data, uint data_size, DB_TXN* txn);
676 int remove_metadata(DB* db, void* key_data, uint key_size, DB_TXN* transaction);
677
678 int update_max_auto_inc(DB* db, ulonglong val);
679 int remove_key_name_from_status(DB* status_block, char* key_name, DB_TXN* txn);
680 int write_key_name_to_status(DB* status_block, char* key_name, DB_TXN* txn);
681 int write_auto_inc_create(DB* db, ulonglong val, DB_TXN* txn);
682 void init_auto_increment();
683 bool can_replace_into_be_fast(TABLE_SHARE* table_share, KEY_AND_COL_INFO* kc_info, uint pk);
684 int initialize_share(const char* name, int mode);
685
686 void set_query_columns(uint keynr);
687 int prelock_range (const key_range *start_key, const key_range *end_key);
688 int create_txn(THD* thd, tokudb_trx_data* trx);
689 bool may_table_be_empty(DB_TXN *txn);
690 int delete_or_rename_table (const char* from_name, const char* to_name, bool is_delete);
691 int delete_or_rename_dictionary( const char* from_name, const char* to_name, const char* index_name, bool is_key, DB_TXN* txn, bool is_delete);
692 int truncate_dictionary( uint keynr, DB_TXN* txn );
693 int create_secondary_dictionary(
694 const char* name,
695 TABLE* form,
696 KEY* key_info,
697 DB_TXN* txn,
698 KEY_AND_COL_INFO* kc_info,
699 uint32_t keynr,
700 bool is_hot_index,
701 toku_compression_method compression_method
702 );
703 int create_main_dictionary(const char* name, TABLE* form, DB_TXN* txn, KEY_AND_COL_INFO* kc_info, toku_compression_method compression_method);
704 void trace_create_table_info(TABLE* form);
705 int is_index_unique(bool* is_unique, DB_TXN* txn, DB* db, KEY* key_info, int lock_flags);
706 int is_val_unique(bool* is_unique, uchar* record, KEY* key_info, uint dict_index, DB_TXN* txn);
707 int do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd);
708 void set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags);
709 int insert_row_to_main_dictionary(DBT* pk_key, DBT* pk_val, DB_TXN* txn);
710 int insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN* txn, THD* thd);
711 void test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val);
712 uint32_t fill_row_mutator(
713 uchar* buf,
714 uint32_t* dropped_columns,
715 uint32_t num_dropped_columns,
716 TABLE* altered_table,
717 KEY_AND_COL_INFO* altered_kc_info,
718 uint32_t keynr,
719 bool is_add
720 );
721
722 // 0 <= active_index < table_share->keys || active_index == MAX_KEY
723 // tokudb_active_index = active_index if active_index < table_share->keys, else tokudb_active_index = primary_key = table_share->keys
724 uint tokudb_active_index;
725
726 public:
727 ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg);
728 ~ha_tokudb();
729
730 const char *table_type() const;
731 const char *index_type(uint inx);
732 const char **bas_ext() const;
733
734 //
735 // Returns a bit mask of capabilities of storage engine. Capabilities
736 // defined in sql/handler.h
737 //
738 ulonglong table_flags() const;
739
740 ulong index_flags(uint inx, uint part, bool all_parts) const;
741
742 //
743 // Returns limit on the number of keys imposed by tokudb.
744 //
max_supported_keys()745 uint max_supported_keys() const {
746 return MAX_KEY;
747 }
748
extra_rec_buf_length()749 uint extra_rec_buf_length() const {
750 return TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
751 }
752 ha_rows estimate_rows_upper_bound();
753
754 //
755 // Returns the limit on the key length imposed by tokudb.
756 //
max_supported_key_length()757 uint max_supported_key_length() const {
758 return UINT_MAX32;
759 }
760
761 //
762 // Returns limit on key part length imposed by tokudb.
763 //
max_supported_key_part_length()764 uint max_supported_key_part_length() const {
765 return UINT_MAX32;
766 }
keys_to_use_for_scanning()767 const key_map *keys_to_use_for_scanning() {
768 return &key_map_full;
769 }
770
771 double scan_time();
772
773 double read_time(uint index, uint ranges, ha_rows rows);
774
775 // Defined in mariadb
776 double keyread_time(uint index, uint ranges, ha_rows rows);
777
778 // Defined in mysql 5.6
779 double index_only_read_time(uint keynr, double records);
780
781 int open(const char *name, int mode, uint test_if_locked);
782 int close();
783 void update_create_info(HA_CREATE_INFO* create_info);
784 int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info);
785 int delete_table(const char *name);
786 int rename_table(const char *from, const char *to);
787 int optimize(THD * thd, HA_CHECK_OPT * check_opt);
788 int analyze(THD * thd, HA_CHECK_OPT * check_opt);
789 int write_row(uchar * buf);
790 int update_row(const uchar * old_data, uchar * new_data);
791 int delete_row(const uchar * buf);
792 #if MYSQL_VERSION_ID >= 100000
793 void start_bulk_insert(ha_rows rows, uint flags);
794 #else
795 void start_bulk_insert(ha_rows rows);
796 #endif
797 static int bulk_insert_poll(void* extra, float progress);
798 static void loader_add_index_err(DB* db,
799 int i,
800 int err,
801 DBT* key,
802 DBT* val,
803 void* error_extra);
804 static void loader_dup(DB* db,
805 int i,
806 int err,
807 DBT* key,
808 DBT* val,
809 void* error_extra);
810 int end_bulk_insert();
811 int end_bulk_insert(bool abort);
812
813 int prepare_index_scan();
814 int prepare_index_key_scan( const uchar * key, uint key_len );
815 int prepare_range_scan( const key_range *start_key, const key_range *end_key);
816 void column_bitmaps_signal();
817 int index_init(uint index, bool sorted);
818 int index_end();
819 int index_next_same(uchar * buf, const uchar * key, uint keylen);
820 int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag);
821 int index_read_last(uchar * buf, const uchar * key, uint key_len);
822 int index_next(uchar * buf);
823 int index_prev(uchar * buf);
824 int index_first(uchar * buf);
825 int index_last(uchar * buf);
826
has_gap_locks()827 bool has_gap_locks() const { return true; }
828
829 int rnd_init(bool scan);
830 int rnd_end();
831 int rnd_next(uchar * buf);
832 int rnd_pos(uchar * buf, uchar * pos);
833
834 int read_range_first(const key_range *start_key,
835 const key_range *end_key,
836 bool eq_range, bool sorted);
837 int read_range_next();
838
839
840 void position(const uchar * record);
841 int info(uint);
842 int extra(enum ha_extra_function operation);
843 int reset();
844 int external_lock(THD * thd, int lock_type);
845 int start_stmt(THD * thd, thr_lock_type lock_type);
846
847 ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key);
848
849 uint32_t get_cursor_isolation_flags(enum thr_lock_type lock_type, THD* thd);
850 THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, enum thr_lock_type lock_type);
851
852 int get_status(DB_TXN* trans);
853 void init_hidden_prim_key_info(DB_TXN *txn);
get_auto_primary_key(uchar * to)854 inline void get_auto_primary_key(uchar * to) {
855 share->lock();
856 share->auto_ident++;
857 hpk_num_to_char(to, share->auto_ident);
858 share->unlock();
859 }
860 virtual void get_auto_increment(
861 ulonglong offset,
862 ulonglong increment,
863 ulonglong nb_desired_values,
864 ulonglong* first_value,
865 ulonglong* nb_reserved_values);
866 bool is_optimize_blocking();
867 bool is_auto_inc_singleton();
868 void print_error(int error, myf errflag);
table_cache_type()869 uint8 table_cache_type() {
870 return HA_CACHE_TBL_TRANSACT;
871 }
primary_key_is_clustered()872 bool primary_key_is_clustered() {
873 return true;
874 }
875 int cmp_ref(const uchar * ref1, const uchar * ref2);
876 bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
877
878 #ifdef MARIADB_BASE_VERSION
879
880 // MariaDB MRR introduced in 5.5, API changed in MariaDB 10.0
881 #if MYSQL_VERSION_ID >= 100000
882 #define COST_VECT Cost_estimate
883 #endif
884
885 int multi_range_read_init(RANGE_SEQ_IF* seq,
886 void* seq_init_param,
887 uint n_ranges, uint mode,
888 HANDLER_BUFFER *buf);
889 int multi_range_read_next(range_id_t *range_info);
890 ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
891 void *seq_init_param,
892 uint n_ranges, uint *bufsz,
893 uint *flags, COST_VECT *cost);
894 ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
895 uint key_parts, uint *bufsz,
896 uint *flags, COST_VECT *cost);
897 int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
898
899 #else
900
901 // MySQL MRR introduced in 5.6
902 #if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
903 int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
904 uint n_ranges, uint mode, HANDLER_BUFFER *buf);
905 int multi_range_read_next(char **range_info);
906 ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
907 void *seq_init_param,
908 uint n_ranges, uint *bufsz,
909 uint *flags, Cost_estimate *cost);
910 ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
911 uint *bufsz, uint *flags, Cost_estimate *cost);
912 #endif
913
914 #endif
915
916 Item* idx_cond_push(uint keyno, class Item* idx_cond);
917 void cancel_pushed_idx_cond();
918
919 #if defined(TOKU_INCLUDE_ALTER_56) && TOKU_INCLUDE_ALTER_56
920 public:
921 enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
922 bool prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
923 bool inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
924 bool commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit);
925 private:
926 int alter_table_add_index(Alter_inplace_info* ha_alter_info);
927 int alter_table_drop_index(Alter_inplace_info* ha_alter_info);
928 int alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
929 int alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
930 int alter_table_expand_columns(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
931 int alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info, int expand_field_num);
932 int alter_table_expand_blobs(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
933 void print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info);
934 int setup_kc_info(TABLE *altered_table, KEY_AND_COL_INFO *kc_info);
935 int new_row_descriptor(TABLE* altered_table,
936 Alter_inplace_info* ha_alter_info,
937 uint32_t idx,
938 DBT* row_descriptor);
939
940 public:
941 #endif // defined(TOKU_INCLUDE_ALTER_56) && TOKU_INCLUDE_ALTER_56
942 #if defined(TOKU_INCLUDE_ALTER_55) && TOKU_INCLUDE_ALTER_55
943 public:
944 // Returns true of the 5.6 inplace alter table interface is used.
945 bool try_hot_alter_table();
946
947 // Used by the partition storage engine to provide new frm data for the table.
948 int new_alter_table_frm_data(const uchar *frm_data, size_t frm_len);
949 #endif // defined(TOKU_INCLUDE_ALTER_55) && TOKU_INCLUDE_ALTER_55
950
951 private:
952 int tokudb_add_index(TABLE* table_arg,
953 KEY* key_info,
954 uint num_of_keys,
955 DB_TXN* txn,
956 bool* inc_num_DBs,
957 bool* modified_DB);
958 static int tokudb_add_index_poll(void *extra, float progress);
959 void restore_add_index(TABLE* table_arg,
960 uint num_of_keys,
961 bool incremented_numDBs,
962 bool modified_DBs);
963 int drop_indexes(uint* key_num, uint num_of_keys, KEY* key_info, DB_TXN* txn);
964 void restore_drop_indexes(uint* key_num, uint num_of_keys);
965
966 public:
967 // delete all rows from the table
968 // effect: all dictionaries, including the main and indexes, should be empty
969 int discard_or_import_tablespace(my_bool discard);
970 int truncate();
971 int delete_all_rows();
972 void extract_hidden_primary_key(uint keynr, DBT const *found_key);
973 void read_key_only(uchar * buf, uint keynr, DBT const *found_key);
974 int read_row_callback (uchar * buf, uint keynr, DBT const *row, DBT const *found_key);
975 int read_primary_key(uchar * buf, uint keynr, DBT const *row, DBT const *found_key);
976 int unpack_blobs(
977 uchar* record,
978 const uchar* from_tokudb_blob,
979 uint32_t num_blob_bytes,
980 bool check_bitmap
981 );
982 int unpack_row(
983 uchar* record,
984 DBT const *row,
985 DBT const *key,
986 uint index
987 );
988
prefix_cmp_dbts(uint keynr,const DBT * first_key,const DBT * second_key)989 int prefix_cmp_dbts( uint keynr, const DBT* first_key, const DBT* second_key) {
990 return tokudb_prefix_cmp_dbt_key(share->key_file[keynr], first_key, second_key);
991 }
992
993 void track_progress(THD* thd);
994 void set_loader_error(int err);
995 void set_dup_value_for_pk(DBT* key);
996
997
998 //
999 // index into key_file that holds DB* that is indexed on
1000 // the primary_key. this->key_file[primary_index] == this->file
1001 //
1002 uint primary_key;
1003
1004 int check(THD *thd, HA_CHECK_OPT *check_opt);
1005
1006 int fill_range_query_buf(
1007 bool need_val,
1008 DBT const* key,
1009 DBT const* row,
1010 int direction,
1011 THD* thd,
1012 uchar* buf,
1013 DBT* key_to_compare);
1014
1015 #if defined(TOKU_INCLUDE_ROW_TYPE_COMPRESSION) && \
1016 TOKU_INCLUDE_ROW_TYPE_COMPRESSION
1017 enum row_type get_row_type() const;
1018 #endif // defined(TOKU_INCLUDE_ROW_TYPE_COMPRESSION) &&
1019 // TOKU_INCLUDE_ROW_TYPE_COMPRESSION
1020 private:
1021 int read_full_row(uchar * buf);
1022 int __close();
1023 int get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read);
1024 int read_data_from_range_query_buff(uchar* buf, bool need_val, bool do_key_read);
1025 // for ICP, only in MariaDB and MySQL 5.6
1026 enum icp_result toku_handler_index_cond_check(Item* pushed_idx_cond);
1027 void invalidate_bulk_fetch();
1028 void invalidate_icp();
1029 int delete_all_rows_internal();
1030 void close_dsmrr();
1031 void reset_dsmrr();
1032
1033 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
1034 int write_frm_data(const uchar *frm_data, size_t frm_len);
1035 #endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
1036
1037 private:
1038 #if defined(TOKU_INCLUDE_UPSERT) && TOKU_INCLUDE_UPSERT
1039 MY_NODISCARD int fast_update(THD *thd,
1040 List<Item> &update_fields,
1041 List<Item> &update_values,
1042 Item *conds);
1043 MY_NODISCARD bool check_fast_update(THD *thd,
1044 List<Item> &update_fields,
1045 List<Item> &update_values,
1046 Item *conds);
1047 MY_NODISCARD int send_update_message(List<Item> &update_fields,
1048 List<Item> &update_values,
1049 Item *conds,
1050 DB_TXN *txn);
1051 MY_NODISCARD int upsert(THD *thd,
1052 List<Item> &update_fields,
1053 List<Item> &update_values);
1054 MY_NODISCARD bool check_upsert(THD *thd,
1055 List<Item> &update_fields,
1056 List<Item> &update_values);
1057 MY_NODISCARD int send_upsert_message(List<Item> &update_fields,
1058 List<Item> &update_values,
1059 DB_TXN *txn);
1060 #endif // defined(TOKU_INCLUDE_UPSERT) && TOKU_INCLUDE_UPSERT
1061
1062 public:
1063 // mysql sometimes retires a txn before a cursor that references the txn is closed.
1064 // for example, commit is sometimes called before index_end. the following methods
1065 // put the handler on a list of handlers that get cleaned up when the txn is retired.
1066 void cleanup_txn(DB_TXN *txn);
1067 private:
1068 LIST trx_handler_list;
1069 void add_to_trx_handler_list();
1070 void remove_from_trx_handler_list();
1071
1072 private:
1073 int do_optimize(THD *thd);
1074 int map_to_handler_error(int error);
1075
1076 #if defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR
1077 public:
1078 void rpl_before_write_rows();
1079 void rpl_after_write_rows();
1080 void rpl_before_delete_rows();
1081 void rpl_after_delete_rows();
1082 void rpl_before_update_rows();
1083 void rpl_after_update_rows();
1084 bool rpl_lookup_rows();
1085 private:
1086 bool in_rpl_write_rows;
1087 bool in_rpl_delete_rows;
1088 bool in_rpl_update_rows;
1089 #endif // defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR
1090 };
1091
1092 #if defined(TOKU_INCLUDE_OPTION_STRUCTS) && TOKU_INCLUDE_OPTION_STRUCTS
1093 struct ha_table_option_struct {
1094 uint row_format;
1095 };
1096
1097 struct ha_index_option_struct {
1098 bool clustering;
1099 };
1100
key_is_clustering(const KEY * key)1101 static inline bool key_is_clustering(const KEY *key) {
1102 return (key->flags & HA_CLUSTERING) || (key->option_struct && key->option_struct->clustering);
1103 }
1104
1105 #else
1106
key_is_clustering(const KEY * key)1107 static inline bool key_is_clustering(const KEY *key) {
1108 return key->flags & HA_CLUSTERING;
1109 }
1110 #endif // defined(TOKU_INCLUDE_OPTION_STRUCTS) && TOKU_INCLUDE_OPTION_STRUCTS
1111
1112 #endif // _HA_TOKUDB_H
1113
1114