1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 TokuDBis is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 TokuDB is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
21
22 ======= */
23
24 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25
26 #if TOKU_INCLUDE_ALTER_56
27
28 #if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
29 #define TOKU_ALTER_RENAME ALTER_RENAME
30 #define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
31 #elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
32 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
33 #define TOKU_ALTER_RENAME ALTER_RENAME
34 #define DYNAMIC_ARRAY_ELEMENTS_TYPE int
35 #elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
36 #define TOKU_ALTER_RENAME ALTER_RENAME_56
37 #define DYNAMIC_ARRAY_ELEMENTS_TYPE int
38 #else
39 #error
40 #endif
41
42 #include "ha_tokudb_alter_common.cc"
43 #include <sql_array.h>
44 #include <sql_base.h>
45
46 // The tokudb alter context contains the alter state that is set in the check if supported method and used
47 // later when the alter operation is executed.
48 class tokudb_alter_ctx : public inplace_alter_handler_ctx {
49 public:
tokudb_alter_ctx()50 tokudb_alter_ctx() :
51 handler_flags(0),
52 alter_txn(NULL),
53 add_index_changed(false),
54 drop_index_changed(false),
55 reset_card(false),
56 compression_changed(false),
57 expand_varchar_update_needed(false),
58 expand_fixed_update_needed(false),
59 expand_blob_update_needed(false),
60 optimize_needed(false),
61 table_kc_info(NULL),
62 altered_table_kc_info(NULL) {
63 }
~tokudb_alter_ctx()64 ~tokudb_alter_ctx() {
65 if (altered_table_kc_info)
66 free_key_and_col_info(altered_table_kc_info);
67 }
68 public:
69 ulong handler_flags;
70 DB_TXN* alter_txn;
71 bool add_index_changed;
72 bool incremented_num_DBs, modified_DBs;
73 bool drop_index_changed;
74 bool reset_card;
75 bool compression_changed;
76 enum toku_compression_method orig_compression_method;
77 bool expand_varchar_update_needed;
78 bool expand_fixed_update_needed;
79 bool expand_blob_update_needed;
80 bool optimize_needed;
81 Dynamic_array<uint> changed_fields;
82 KEY_AND_COL_INFO* table_kc_info;
83 KEY_AND_COL_INFO* altered_table_kc_info;
84 KEY_AND_COL_INFO altered_table_kc_info_base;
85 };
86
87 // Debug function to print out an alter table operation
print_alter_info(TABLE * altered_table,Alter_inplace_info * ha_alter_info)88 void ha_tokudb::print_alter_info(
89 TABLE* altered_table,
90 Alter_inplace_info* ha_alter_info) {
91
92 TOKUDB_TRACE(
93 "***are keys of two tables same? %d",
94 tables_have_same_keys(table, altered_table, false, false));
95 if (ha_alter_info->handler_flags) {
96 TOKUDB_TRACE("***alter flags set ***");
97 for (int i = 0; i < 32; i++) {
98 if (ha_alter_info->handler_flags & (1 << i))
99 TOKUDB_TRACE("%d", i);
100 }
101 }
102
103 // everyone calculates data by doing some default_values - record[0], but
104 // I do not see why that is necessary
105 TOKUDB_TRACE("******");
106 TOKUDB_TRACE("***orig table***");
107 for (uint i = 0; i < table->s->fields; i++) {
108 //
109 // make sure to use table->field, and NOT table->s->field
110 //
111 Field* curr_field = table->field[i];
112 uint null_offset = get_null_offset(table, curr_field);
113 TOKUDB_TRACE(
114 "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
115 "%d, is_null %d, pack_length %u",
116 curr_field->field_name.str,
117 curr_field->real_type(),
118 mysql_to_toku_type(curr_field),
119 curr_field->null_bit,
120 null_offset,
121 curr_field->real_maybe_null(),
122 curr_field->real_maybe_null() ?
123 table->s->default_values[null_offset] & curr_field->null_bit :
124 0xffffffff,
125 curr_field->pack_length());
126 }
127 TOKUDB_TRACE("******");
128 TOKUDB_TRACE("***altered table***");
129 for (uint i = 0; i < altered_table->s->fields; i++) {
130 Field* curr_field = altered_table->field[i];
131 uint null_offset = get_null_offset(altered_table, curr_field);
132 TOKUDB_TRACE(
133 "name: %s, types: %u %u, nullable: %d, null_offset: %d, "
134 "is_null_field: %d, is_null %d, pack_length %u",
135 curr_field->field_name.str,
136 curr_field->real_type(),
137 mysql_to_toku_type(curr_field),
138 curr_field->null_bit,
139 null_offset,
140 curr_field->real_maybe_null(),
141 curr_field->real_maybe_null() ?
142 altered_table->s->default_values[null_offset] &
143 curr_field->null_bit : 0xffffffff,
144 curr_field->pack_length());
145 }
146 TOKUDB_TRACE("******");
147 }
148
149 // Given two tables with equal number of fields, find all of the fields with
150 // different types and return the indexes of the different fields in the
151 // changed_fields array. This function ignores field name differences.
find_changed_fields(TABLE * table_a,TABLE * table_b,Dynamic_array<uint> & changed_fields)152 static int find_changed_fields(
153 TABLE* table_a,
154 TABLE* table_b,
155 Dynamic_array<uint>& changed_fields) {
156
157 for (uint i = 0; i < table_a->s->fields; i++) {
158 Field* field_a = table_a->field[i];
159 Field* field_b = table_b->field[i];
160 if (!fields_are_same_type(field_a, field_b))
161 changed_fields.append(i);
162 }
163 return changed_fields.elements();
164 }
165
166 static bool change_length_is_supported(TABLE* table,
167 TABLE* altered_table,
168 tokudb_alter_ctx* ctx);
169
170 static bool change_type_is_supported(TABLE* table,
171 TABLE* altered_table,
172 tokudb_alter_ctx* ctx);
173
174 // The ha_alter_info->handler_flags can not be trusted.
175 // This function maps the bogus handler flags to something we like.
fix_handler_flags(THD * thd,TABLE * table,TABLE * altered_table,Alter_inplace_info * ha_alter_info)176 static ulong fix_handler_flags(
177 THD* thd,
178 TABLE* table,
179 TABLE* altered_table,
180 Alter_inplace_info* ha_alter_info) {
181
182 ulong handler_flags = ha_alter_info->handler_flags;
183
184 #if 100000 <= MYSQL_VERSION_ID
185 // This is automatically supported, hide the flag from later checks
186 handler_flags &= ~ALTER_PARTITIONED;
187 #endif
188
189 // workaround for fill_alter_inplace_info bug (#5193)
190 // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
191 // column addition that does not change the keys.
192 // the following code turns the ADD_INDEX and DROP_INDEX flags so that
193 // we can do hot column addition later.
194 if (handler_flags &
195 (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) {
196 if (handler_flags &
197 (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) {
198 if (tables_have_same_keys(
199 table,
200 altered_table,
201 tokudb::sysvars::alter_print_error(thd) != 0, false)) {
202 handler_flags &=
203 ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
204 ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX);
205 }
206 }
207 }
208
209 // always allow rename table + any other operation, so turn off the
210 // rename flag
211 handler_flags &= ~ALTER_RENAME;
212
213 // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
214 // so turn off the flag
215 if (handler_flags & ALTER_STORED_COLUMN_TYPE) {
216 if (all_fields_are_same_type(table, altered_table)) {
217 handler_flags &= ~ALTER_STORED_COLUMN_TYPE;
218 }
219 }
220
221 return handler_flags;
222 }
223
224 // Require that there is no intersection of add and drop names.
is_disjoint_add_drop(Alter_inplace_info * ha_alter_info)225 static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
226 for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
227 KEY* drop_key = ha_alter_info->index_drop_buffer[d];
228 for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
229 KEY* add_key =
230 &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
231 if (strcmp(drop_key->name.str, add_key->name.str) == 0) {
232 return false;
233 }
234 }
235 }
236 return true;
237 }
238
239 // Return true if some bit in mask is set and no bit in ~mask is set,
240 // otherwise return false.
only_flags(ulong bits,ulong mask)241 static bool only_flags(ulong bits, ulong mask) {
242 return (bits & mask) != 0 && (bits & ~mask) == 0;
243 }
244
245 // Table create options that should be ignored by TokuDB
246 // There are 25 total create options defined by mysql server (see handler.h),
247 // and only 4 options will touch engine data, either rebuild engine data or
248 // just update meta info:
249 // 1. HA_CREATE_USED_AUTO update auto_inc info
250 // 2. HA_CREATE_USED_CHARSET rebuild table if contains character columns
251 // 3. HA_CREATE_USED_ENGINE rebuild table
252 // 4. HA_CREATE_USED_ROW_FORMAT update compression method info
253 //
254 // All the others are either not supported by TokuDB or no need to
255 // touch engine data.
256 static constexpr uint32_t TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS =
257 HA_CREATE_USED_RAID | // deprecated field
258 HA_CREATE_USED_UNION | // for MERGE table
259 HA_CREATE_USED_INSERT_METHOD | // for MERGE table
260 HA_CREATE_USED_MIN_ROWS | // for MEMORY table
261 HA_CREATE_USED_MAX_ROWS | // for NDB table
262 HA_CREATE_USED_AVG_ROW_LENGTH | // for MyISAM table
263 HA_CREATE_USED_PACK_KEYS | // for MyISAM table
264 HA_CREATE_USED_DEFAULT_CHARSET | // no need to rebuild
265 HA_CREATE_USED_DATADIR | // ignored by alter
266 HA_CREATE_USED_INDEXDIR | // ignored by alter
267 HA_CREATE_USED_CHECKSUM | // for MyISAM table
268 HA_CREATE_USED_DELAY_KEY_WRITE | // for MyISAM table
269 HA_CREATE_USED_COMMENT | // no need to rebuild
270 HA_CREATE_USED_PASSWORD | // not supported by community version
271 HA_CREATE_USED_CONNECTION | // for FEDERATED table
272 HA_CREATE_USED_KEY_BLOCK_SIZE | // not supported by TokuDB
273 HA_CREATE_USED_TRANSACTIONAL | // unused
274 HA_CREATE_USED_PAGE_CHECKSUM | // unsued
275 HA_CREATE_USED_STATS_PERSISTENT | // not supported by TokuDB
276 HA_CREATE_USED_STATS_AUTO_RECALC | // not supported by TokuDB
277 HA_CREATE_USED_STATS_SAMPLE_PAGES; // not supported by TokuDB
278
279 // Check if an alter table operation on this table and described by the alter
280 // table parameters is supported inplace and if so, what type of locking is
281 // needed to execute it. return values:
282
283 // HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
284 // inplace operation, a table copy is required
285
286 // HA_ALTER_ERROR: the alter table operation should fail
287
288 // HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
289
290 // HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X,
291 // alter runs with MDL SNW
292
293 // HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
294 // concurrent reads, no writes
295
296 // HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X,
297 // alter runs with MDL SW
298
299 // HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
300 // concurrent reads, writes.
301 // must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
302 // deadlocks with the MDL lock and the table lock
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)303 enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
304 TABLE* altered_table,
305 Alter_inplace_info* ha_alter_info) {
306
307 TOKUDB_HANDLER_DBUG_ENTER("");
308
309 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
310 print_alter_info(altered_table, ha_alter_info);
311 }
312
313 // default is NOT inplace
314 enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
315 THD* thd = ha_thd();
316
317 // setup context
318 tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
319 ha_alter_info->handler_ctx = ctx;
320 ctx->handler_flags =
321 fix_handler_flags(thd, table, altered_table, ha_alter_info);
322 ctx->table_kc_info = &share->kc_info;
323 ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
324 memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
325
326 if (tokudb::sysvars::disable_hot_alter(thd)) {
327 ; // do nothing
328 } else if (only_flags(
329 ctx->handler_flags,
330 ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
331 ALTER_DROP_UNIQUE_INDEX +
332 ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
333 ALTER_ADD_UNIQUE_INDEX)) {
334 // add or drop index
335 if (table->s->null_bytes == altered_table->s->null_bytes &&
336 (ha_alter_info->index_add_count > 0 ||
337 ha_alter_info->index_drop_count > 0) &&
338 !tables_have_same_keys(
339 table,
340 altered_table,
341 tokudb::sysvars::alter_print_error(thd) != 0, false) &&
342 is_disjoint_add_drop(ha_alter_info)) {
343
344 if (ctx->handler_flags &
345 (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
346 ALTER_DROP_UNIQUE_INDEX)) {
347 // the fractal tree can not handle dropping an index concurrent
348 // with querying with the index.
349 // we grab an exclusive MDL for the drop index.
350 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
351 } else {
352 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
353 or algorithm=instant for non-InnoDB engine */
354 result = HA_ALTER_INPLACE_COPY_LOCK;
355
356 // someday, allow multiple hot indexes via alter table add key.
357 // don't forget to change the store_lock function.
358 // for now, hot indexing is only supported via session variable
359 // with the create index sql command
360 if (ha_alter_info->index_add_count == 1 &&
361 // only one add or drop
362 ha_alter_info->index_drop_count == 0 &&
363 // must be add index not add unique index
364 ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX &&
365 // must be a create index command
366 thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
367 // must be enabled
368 tokudb::sysvars::create_index_online(thd)) {
369 // external_lock set WRITE_ALLOW_WRITE which allows writes
370 // concurrent with the index creation
371 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
372 or algorithm=instant for non-InnoDB engine */
373 result = HA_ALTER_INPLACE_COPY_NO_LOCK;
374 }
375 }
376 }
377 } else if (only_flags(
378 ctx->handler_flags,
379 ALTER_COLUMN_DEFAULT)) {
380 // column default
381 if (table->s->null_bytes == altered_table->s->null_bytes)
382 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
383 } else if (ctx->handler_flags & ALTER_COLUMN_NAME &&
384 only_flags(
385 ctx->handler_flags,
386 ALTER_COLUMN_NAME |
387 ALTER_COLUMN_DEFAULT)) {
388 // column rename
389 // we have identified a possible column rename,
390 // but let's do some more checks
391
392 // we will only allow an hcr if there are no changes
393 // in column positions (ALTER_STORED_COLUMN_ORDER is not set)
394
395 // now need to verify that one and only one column
396 // has changed only its name. If we find anything to
397 // the contrary, we don't allow it, also check indexes
398 if (table->s->null_bytes == altered_table->s->null_bytes) {
399 bool cr_supported =
400 column_rename_supported(
401 table,
402 altered_table,
403 (ctx->handler_flags &
404 ALTER_STORED_COLUMN_ORDER) != 0);
405 if (cr_supported)
406 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
407 }
408 } else if (ctx->handler_flags & ALTER_ADD_COLUMN &&
409 only_flags(
410 ctx->handler_flags,
411 ALTER_ADD_COLUMN |
412 ALTER_STORED_COLUMN_ORDER) &&
413 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
414
415 // add column
416 uint32_t added_columns[altered_table->s->fields];
417 uint32_t num_added_columns = 0;
418 int r =
419 find_changed_columns(
420 added_columns,
421 &num_added_columns,
422 table,
423 altered_table);
424 if (r == 0) {
425 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
426 for (uint32_t i = 0; i < num_added_columns; i++) {
427 uint32_t curr_added_index = added_columns[i];
428 Field* curr_added_field =
429 altered_table->field[curr_added_index];
430 TOKUDB_TRACE(
431 "Added column: index %d, name %s",
432 curr_added_index,
433 curr_added_field->field_name.str);
434 }
435 }
436 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
437 }
438 } else if (ctx->handler_flags & ALTER_DROP_COLUMN &&
439 only_flags(
440 ctx->handler_flags,
441 ALTER_DROP_COLUMN |
442 ALTER_STORED_COLUMN_ORDER) &&
443 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
444
445 // drop column
446 uint32_t dropped_columns[table->s->fields];
447 uint32_t num_dropped_columns = 0;
448 int r =
449 find_changed_columns(
450 dropped_columns,
451 &num_dropped_columns,
452 altered_table,
453 table);
454 if (r == 0) {
455 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
456 for (uint32_t i = 0; i < num_dropped_columns; i++) {
457 uint32_t curr_dropped_index = dropped_columns[i];
458 Field* curr_dropped_field = table->field[curr_dropped_index];
459 TOKUDB_TRACE(
460 "Dropped column: index %d, name %s",
461 curr_dropped_index,
462 curr_dropped_field->field_name.str);
463 }
464 }
465 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
466 }
467 } else if ((ctx->handler_flags &
468 ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
469 only_flags(
470 ctx->handler_flags,
471 ALTER_COLUMN_EQUAL_PACK_LENGTH |
472 ALTER_COLUMN_DEFAULT) &&
473 table->s->fields == altered_table->s->fields &&
474 find_changed_fields(
475 table,
476 altered_table,
477 ctx->changed_fields) > 0 &&
478 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
479
480 // change column length
481 if (change_length_is_supported(table, altered_table, ctx)) {
482 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
483 }
484 } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) &&
485 only_flags(
486 ctx->handler_flags,
487 ALTER_STORED_COLUMN_TYPE |
488 ALTER_COLUMN_DEFAULT) &&
489 table->s->fields == altered_table->s->fields &&
490 find_changed_fields(
491 table,
492 altered_table,
493 ctx->changed_fields) > 0 &&
494 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
495
496 // change column type
497 if (change_type_is_supported(table, altered_table, ctx)) {
498 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
499 }
500 } else if (only_flags(
501 ctx->handler_flags,
502 ALTER_CHANGE_CREATE_OPTION)) {
503
504 HA_CREATE_INFO* create_info = ha_alter_info->create_info;
505 #if TOKU_INCLUDE_OPTION_STRUCTS
506 // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
507 // compression
508 if (create_info->option_struct->row_format !=
509 table_share->option_struct->row_format)
510 create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
511 #endif
512 // alter auto_increment
513 if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
514 // do a sanity check that the table is what we think it is
515 if (tables_have_same_keys_and_columns(
516 table,
517 altered_table,
518 tokudb::sysvars::alter_print_error(thd) != 0)) {
519 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
520 }
521 } else if (only_flags(
522 create_info->used_fields,
523 HA_CREATE_USED_ROW_FORMAT)) {
524 // alter row_format
525 // do a sanity check that the table is what we think it is
526 if (tables_have_same_keys_and_columns(
527 table,
528 altered_table,
529 tokudb::sysvars::alter_print_error(thd) != 0)) {
530 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
531 }
532 } else if (only_flags(
533 create_info->used_fields,
534 TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS)) {
535 result = HA_ALTER_INPLACE_COPY_NO_LOCK;
536 }
537 }
538 #if TOKU_OPTIMIZE_WITH_RECREATE
539 else if (only_flags(
540 ctx->handler_flags,
541 ALTER_RECREATE_TABLE |
542 ALTER_COLUMN_DEFAULT)) {
543 ctx->optimize_needed = true;
544 /* FIXME: MDEV-16099 Use alter algorithm=nocopy
545 or algorithm=instant for non-InnoDB engine */
546 result = HA_ALTER_INPLACE_COPY_NO_LOCK;
547 }
548 #endif
549
550 if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
551 result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
552 table->s->null_bytes != altered_table->s->null_bytes) {
553
554 TOKUDB_HANDLER_TRACE("q %s", thd->query());
555 TOKUDB_HANDLER_TRACE(
556 "null bytes %u -> %u",
557 table->s->null_bytes,
558 altered_table->s->null_bytes);
559 }
560
561 // turn a not supported result into an error if the slow alter table
562 // (copy) is disabled
563 if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
564 tokudb::sysvars::disable_slow_alter(thd)) {
565 print_error(HA_ERR_UNSUPPORTED, MYF(0));
566 result = HA_ALTER_ERROR;
567 }
568
569 DBUG_RETURN(result);
570 }
571
572 // Prepare for the alter operations
prepare_inplace_alter_table(TOKUDB_UNUSED (TABLE * altered_table),Alter_inplace_info * ha_alter_info)573 bool ha_tokudb::prepare_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
574 Alter_inplace_info* ha_alter_info) {
575 TOKUDB_HANDLER_DBUG_ENTER("");
576 tokudb_alter_ctx* ctx =
577 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
578 assert_always(transaction); // transaction must exist after table is locked
579 ctx->alter_txn = transaction;
580 bool result = false; // success
581 DBUG_RETURN(result);
582 }
583
584 // Execute the alter operations.
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)585 bool ha_tokudb::inplace_alter_table(
586 TABLE* altered_table,
587 Alter_inplace_info* ha_alter_info) {
588
589 TOKUDB_HANDLER_DBUG_ENTER("");
590
591 int error = 0;
592 tokudb_alter_ctx* ctx =
593 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
594 HA_CREATE_INFO* create_info = ha_alter_info->create_info;
595
596 // this should be enough to handle locking as the higher level MDL
597 // on this table should prevent any new analyze tasks.
598 share->cancel_background_jobs();
599
600 if (error == 0 &&
601 (ctx->handler_flags &
602 (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX |
603 ALTER_DROP_UNIQUE_INDEX))) {
604 error = alter_table_drop_index(ha_alter_info);
605 }
606 if (error == 0 &&
607 (ctx->handler_flags &
608 (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
609 ALTER_ADD_UNIQUE_INDEX))) {
610 error = alter_table_add_index(ha_alter_info);
611 }
612 if (error == 0 &&
613 (ctx->handler_flags &
614 (ALTER_ADD_COLUMN |
615 ALTER_DROP_COLUMN))) {
616 error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
617 }
618 if (error == 0 &&
619 (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
620 (create_info->used_fields & HA_CREATE_USED_AUTO)) {
621 error = write_auto_inc_create(
622 share->status_block,
623 create_info->auto_increment_value,
624 ctx->alter_txn);
625 }
626 if (error == 0 &&
627 (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
628 (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
629 // Get the current compression
630 DB *db = share->key_file[0];
631 error = db->get_compression_method(db, &ctx->orig_compression_method);
632 assert_always(error == 0);
633
634 // Set the new compression
635 #if TOKU_INCLUDE_OPTION_STRUCTS
636 toku_compression_method method =
637 row_format_to_toku_compression_method(
638 (tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
639 #else
640 toku_compression_method method =
641 row_type_to_toku_compression_method(create_info->row_type);
642 #endif
643 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
644 for (uint32_t i = 0; i < curr_num_DBs; i++) {
645 db = share->key_file[i];
646 error = db->change_compression_method(db, method);
647 if (error)
648 break;
649 ctx->compression_changed = true;
650 }
651 }
652
653 // note: only one column expansion is allowed
654
655 if (error == 0 && ctx->expand_fixed_update_needed)
656 error = alter_table_expand_columns(altered_table, ha_alter_info);
657
658 if (error == 0 && ctx->expand_varchar_update_needed)
659 error = alter_table_expand_varchar_offsets(
660 altered_table,
661 ha_alter_info);
662
663 if (error == 0 && ctx->expand_blob_update_needed)
664 error = alter_table_expand_blobs(altered_table, ha_alter_info);
665
666 if (error == 0 && ctx->reset_card) {
667 error = tokudb::alter_card(
668 share->status_block,
669 ctx->alter_txn,
670 table->s,
671 altered_table->s);
672 }
673 if (error == 0 && ctx->optimize_needed) {
674 error = do_optimize(ha_thd());
675 }
676
677
678 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
679 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
680 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
681 #if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
682 if (error == 0 &&
683 (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
684 #else
685 if (error == 0) {
686 #endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
687 error = write_frm_data(
688 share->status_block,
689 ctx->alter_txn,
690 altered_table->s->path.str);
691 }
692 #endif // (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) ||
693 // (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
694 #endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
695
696 bool result = false; // success
697 if (error) {
698 print_error(error, MYF(0));
699 result = true; // failure
700 }
701
702 DBUG_RETURN(result);
703 }
704
705 int ha_tokudb::alter_table_add_index(Alter_inplace_info* ha_alter_info) {
706
707 // sort keys in add index order
708 KEY* key_info = (KEY*)tokudb::memory::malloc(
709 sizeof(KEY) * ha_alter_info->index_add_count,
710 MYF(MY_WME));
711 for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
712 KEY *key = &key_info[i];
713 *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
714 for (KEY_PART_INFO* key_part = key->key_part;
715 key_part < key->key_part + key->user_defined_key_parts;
716 key_part++) {
717 key_part->field = table->field[key_part->fieldnr];
718 }
719 }
720
721 tokudb_alter_ctx* ctx =
722 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
723 ctx->add_index_changed = true;
724 int error = tokudb_add_index(
725 table,
726 key_info,
727 ha_alter_info->index_add_count,
728 ctx->alter_txn,
729 &ctx->incremented_num_DBs,
730 &ctx->modified_DBs);
731 if (error == HA_ERR_FOUND_DUPP_KEY) {
732 // hack for now, in case of duplicate key error,
733 // because at the moment we cannot display the right key
734 // information to the user, so that he knows potentially what went
735 // wrong.
736 last_dup_key = MAX_KEY;
737 }
738
739 tokudb::memory::free(key_info);
740
741 if (error == 0)
742 ctx->reset_card = true;
743
744 return error;
745 }
746
747 static bool find_index_of_key(
748 const char* key_name,
749 TABLE* table,
750 uint* index_offset_ptr) {
751
752 for (uint i = 0; i < table->s->keys; i++) {
753 if (strcmp(key_name, table->key_info[i].name.str) == 0) {
754 *index_offset_ptr = i;
755 return true;
756 }
757 }
758 return false;
759 }
760
761 static bool find_index_of_key(
762 const char* key_name,
763 KEY* key_info,
764 uint key_count,
765 uint* index_offset_ptr) {
766
767 for (uint i = 0; i < key_count; i++) {
768 if (strcmp(key_name, key_info[i].name.str) == 0) {
769 *index_offset_ptr = i;
770 return true;
771 }
772 }
773 return false;
774 }
775
776 int ha_tokudb::alter_table_drop_index(Alter_inplace_info* ha_alter_info) {
777
778 KEY *key_info = table->key_info;
779 // translate key names to indexes into the key_info array
780 uint index_drop_offsets[ha_alter_info->index_drop_count];
781 for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
782 bool found;
783 found = find_index_of_key(
784 ha_alter_info->index_drop_buffer[i]->name.str,
785 table,
786 &index_drop_offsets[i]);
787 if (!found) {
788 // undo of add key in partition engine
789 found = find_index_of_key(
790 ha_alter_info->index_drop_buffer[i]->name.str,
791 ha_alter_info->key_info_buffer,
792 ha_alter_info->key_count,
793 &index_drop_offsets[i]);
794 assert_always(found);
795 key_info = ha_alter_info->key_info_buffer;
796 }
797 }
798
799 // drop indexes
800 tokudb_alter_ctx* ctx =
801 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
802 ctx->drop_index_changed = true;
803
804 int error = drop_indexes(index_drop_offsets,
805 ha_alter_info->index_drop_count,
806 key_info,
807 ctx->alter_txn);
808
809 if (error == 0)
810 ctx->reset_card = true;
811
812 return error;
813 }
814
815 int ha_tokudb::alter_table_add_or_drop_column(
816 TABLE* altered_table,
817 Alter_inplace_info* ha_alter_info) {
818
819 tokudb_alter_ctx* ctx =
820 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
821 int error;
822 uchar *column_extra = NULL;
823 uint32_t max_column_extra_size;
824 uint32_t num_column_extra;
825 uint32_t num_columns = 0;
826 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
827 // set size such that we know it is big enough for both cases
828 uint32_t columns[table->s->fields + altered_table->s->fields];
829 memset(columns, 0, sizeof(columns));
830
831 // generate the array of columns
832 if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) {
833 find_changed_columns(
834 columns,
835 &num_columns,
836 altered_table,
837 table);
838 } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) {
839 find_changed_columns(
840 columns,
841 &num_columns,
842 table,
843 altered_table);
844 } else {
845 assert_unreachable();
846 }
847 max_column_extra_size =
848 // max static row_mutator
849 STATIC_ROW_MUTATOR_SIZE +
850 // max dynamic row_mutator
851 4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
852 // max static blob size
853 (4 + share->kc_info.num_blobs) +
854 // max dynamic blob size
855 (num_columns*(1+4+1+4));
856 column_extra = (uchar*)tokudb::memory::malloc(
857 max_column_extra_size,
858 MYF(MY_WME));
859 if (column_extra == NULL) {
860 error = ENOMEM;
861 goto cleanup;
862 }
863
864 for (uint32_t i = 0; i < curr_num_DBs; i++) {
865 // change to a new descriptor
866 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
867 error = new_row_descriptor(
868 altered_table, ha_alter_info, i, &row_descriptor);
869 if (error)
870 goto cleanup;
871 error = share->key_file[i]->change_descriptor(
872 share->key_file[i],
873 ctx->alter_txn,
874 &row_descriptor,
875 0);
876 tokudb::memory::free(row_descriptor.data);
877 if (error)
878 goto cleanup;
879
880 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
881 num_column_extra = fill_row_mutator(
882 column_extra,
883 columns,
884 num_columns,
885 altered_table,
886 ctx->altered_table_kc_info,
887 i,
888 // true if adding columns, otherwise is a drop
889 (ha_alter_info->handler_flags &
890 ALTER_ADD_COLUMN) != 0);
891
892 DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
893 column_dbt.data = column_extra;
894 column_dbt.size = num_column_extra;
895 DBUG_ASSERT(num_column_extra <= max_column_extra_size);
896 error = share->key_file[i]->update_broadcast(
897 share->key_file[i],
898 ctx->alter_txn,
899 &column_dbt,
900 DB_IS_RESETTING_OP);
901 if (error) {
902 goto cleanup;
903 }
904 }
905 }
906
907 error = 0;
908 cleanup:
909 tokudb::memory::free(column_extra);
910 return error;
911 }
912
913 // Commit or abort the alter operations.
914 // If commit then write the new frm data to the status using the alter
915 // transaction.
916 // If abort then abort the alter transaction and try to rollback the
917 // non-transactional changes.
918 bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
919 Alter_inplace_info* ha_alter_info,
920 bool commit) {
921 TOKUDB_HANDLER_DBUG_ENTER("");
922
923 tokudb_alter_ctx* ctx =
924 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
925 bool result = false; // success
926 THD *thd = ha_thd();
927
928 if (commit) {
929 #if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
930 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
931 (100000 <= MYSQL_VERSION_ID)
932 if (ha_alter_info->group_commit_ctx) {
933 ha_alter_info->group_commit_ctx = NULL;
934 }
935 #endif
936 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
937 #if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
938 (100000 <= MYSQL_VERSION_ID)
939 #if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
940 if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
941 #else
942 if (true) {
943 #endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
944 int error = write_frm_data(
945 share->status_block,
946 ctx->alter_txn,
947 altered_table->s->path.str);
948 if (error) {
949 commit = false;
950 result = true;
951 print_error(error, MYF(0));
952 }
953 }
954 #endif // (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) ||
955 // (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099)
956 #endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
957 }
958
959 if (!commit) {
960 if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
961 (ctx->add_index_changed || ctx->drop_index_changed ||
962 ctx->compression_changed)) {
963
964 // get exclusive lock no matter what
965 #if defined(MARIADB_BASE_VERSION)
966 killed_state saved_killed_state = thd->killed;
967 thd->killed = NOT_KILLED;
968 for (volatile uint i = 0;
969 wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
970 i++) {
971 if (thd->killed != NOT_KILLED)
972 thd->killed = NOT_KILLED;
973 sleep(1);
974 }
975 assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
976 if (thd->killed == NOT_KILLED)
977 thd->killed = saved_killed_state;
978 #else
979 THD::killed_state saved_killed_state = thd->killed;
980 thd->killed = THD::NOT_KILLED;
981 // MySQL does not handle HA_EXTRA_NOT_USED so we use
982 // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
983 // the partition storage engine and is treated as a NOP by tokudb
984 for (volatile uint i = 0;
985 wait_while_table_is_used(
986 thd,
987 table,
988 HA_EXTRA_PREPARE_FOR_RENAME);
989 i++) {
990 if (thd->killed != THD::NOT_KILLED)
991 thd->killed = THD::NOT_KILLED;
992 sleep(1);
993 }
994 assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
995 if (thd->killed == THD::NOT_KILLED)
996 thd->killed = saved_killed_state;
997 #endif
998 }
999
1000 // abort the alter transaction NOW so that any alters are rolled back.
1001 // this allows the following restores to work.
1002 tokudb_trx_data* trx =
1003 (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
1004 assert_always(ctx->alter_txn == trx->stmt);
1005 assert_always(trx->tokudb_lock_count > 0);
1006 // for partitioned tables, we use a single transaction to do all of the
1007 // partition changes. the tokudb_lock_count is a reference count for
1008 // each of the handlers to the same transaction. obviously, we want
1009 // to only abort once.
1010 if (trx->tokudb_lock_count > 0) {
1011 if (--trx->tokudb_lock_count <= trx->create_lock_count) {
1012 trx->create_lock_count = 0;
1013 abort_txn(ctx->alter_txn);
1014 ctx->alter_txn = NULL;
1015 trx->stmt = NULL;
1016 trx->sub_sp_level = NULL;
1017 }
1018 transaction = NULL;
1019 }
1020
1021 if (ctx->add_index_changed) {
1022 restore_add_index(
1023 table,
1024 ha_alter_info->index_add_count,
1025 ctx->incremented_num_DBs,
1026 ctx->modified_DBs);
1027 }
1028 if (ctx->drop_index_changed) {
1029 // translate key names to indexes into the key_info array
1030 uint index_drop_offsets[ha_alter_info->index_drop_count];
1031 for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
1032 bool found = find_index_of_key(
1033 ha_alter_info->index_drop_buffer[i]->name.str,
1034 table,
1035 &index_drop_offsets[i]);
1036 assert_always(found);
1037 }
1038 restore_drop_indexes(index_drop_offsets,
1039 ha_alter_info->index_drop_count);
1040 }
1041 if (ctx->compression_changed) {
1042 uint32_t curr_num_DBs =
1043 table->s->keys + tokudb_test(hidden_primary_key);
1044 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1045 DB *db = share->key_file[i];
1046 int error = db->change_compression_method(
1047 db,
1048 ctx->orig_compression_method);
1049 assert_always(error == 0);
1050 }
1051 }
1052 }
1053 DBUG_RETURN(result);
1054 }
1055
1056 // Setup the altered table's key and col info.
1057 int ha_tokudb::setup_kc_info(
1058 TABLE* altered_table,
1059 KEY_AND_COL_INFO* altered_kc_info) {
1060
1061 int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
1062 if (error == 0)
1063 error = initialize_key_and_col_info(
1064 altered_table->s,
1065 altered_table,
1066 altered_kc_info,
1067 hidden_primary_key,
1068 primary_key);
1069 return error;
1070 }
1071
1072 // Expand the variable length fields offsets from 1 to 2 bytes.
1073 int ha_tokudb::alter_table_expand_varchar_offsets(
1074 TABLE* altered_table,
1075 Alter_inplace_info* ha_alter_info) {
1076
1077 int error = 0;
1078 tokudb_alter_ctx* ctx =
1079 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1080
1081 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1082 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1083 // change to a new descriptor
1084 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1085 error = new_row_descriptor(
1086 altered_table, ha_alter_info, i, &row_descriptor);
1087 if (error)
1088 break;
1089 error = share->key_file[i]->change_descriptor(
1090 share->key_file[i],
1091 ctx->alter_txn,
1092 &row_descriptor,
1093 0);
1094 tokudb::memory::free(row_descriptor.data);
1095 if (error)
1096 break;
1097
1098 // for all trees that have values, make an update variable offsets
1099 // message and broadcast it into the tree
1100 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1101 uint32_t offset_start =
1102 table_share->null_bytes +
1103 share->kc_info.mcp_info[i].fixed_field_size;
1104 uint32_t offset_end =
1105 offset_start +
1106 share->kc_info.mcp_info[i].len_of_offsets;
1107 uint32_t number_of_offsets = offset_end - offset_start;
1108
1109 // make the expand variable offsets message
1110 DBT expand; memset(&expand, 0, sizeof expand);
1111 expand.size =
1112 sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
1113 expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1114 if (!expand.data) {
1115 error = ENOMEM;
1116 break;
1117 }
1118 uchar* expand_ptr = (uchar*)expand.data;
1119 expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
1120 expand_ptr += sizeof(uchar);
1121
1122 memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
1123 expand_ptr += sizeof(number_of_offsets);
1124
1125 memcpy(expand_ptr, &offset_start, sizeof(offset_start));
1126 expand_ptr += sizeof(offset_start);
1127
1128 // and broadcast it into the tree
1129 error = share->key_file[i]->update_broadcast(
1130 share->key_file[i],
1131 ctx->alter_txn,
1132 &expand,
1133 DB_IS_RESETTING_OP);
1134 tokudb::memory::free(expand.data);
1135 if (error)
1136 break;
1137 }
1138 }
1139
1140 return error;
1141 }
1142
1143 // Return true if a field is part of a key
1144 static bool field_in_key(KEY *key, Field *field) {
1145 for (uint i = 0; i < key->user_defined_key_parts; i++) {
1146 KEY_PART_INFO *key_part = &key->key_part[i];
1147 if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0)
1148 return true;
1149 }
1150 return false;
1151 }
1152
1153 // Return true if a field is part of any key
1154 static bool field_in_key_of_table(TABLE *table, Field *field) {
1155 for (uint i = 0; i < table->s->keys; i++) {
1156 if (field_in_key(&table->key_info[i], field))
1157 return true;
1158 }
1159 return false;
1160 }
1161
1162 // Return true if all changed varchar/varbinary field lengths can be changed
1163 // inplace, otherwise return false
1164 static bool change_varchar_length_is_supported(Field* old_field,
1165 Field* new_field,
1166 tokudb_alter_ctx* ctx) {
1167 if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
1168 new_field->real_type() != MYSQL_TYPE_VARCHAR ||
1169 old_field->binary() != new_field->binary() ||
1170 old_field->charset()->number != new_field->charset()->number ||
1171 old_field->field_length > new_field->field_length)
1172 return false;
1173 if (ctx->table_kc_info->num_offset_bytes >
1174 ctx->altered_table_kc_info->num_offset_bytes)
1175 return false; // shrink is not supported
1176 if (ctx->table_kc_info->num_offset_bytes <
1177 ctx->altered_table_kc_info->num_offset_bytes)
1178 // sum of varchar lengths changed from 1 to 2
1179 ctx->expand_varchar_update_needed = true;
1180 return true;
1181 }
1182
1183 // Return true if all changed field lengths can be changed inplace, otherwise
1184 // return false
1185 static bool change_length_is_supported(TABLE* table,
1186 TABLE* altered_table,
1187 tokudb_alter_ctx* ctx) {
1188 if (table->s->fields != altered_table->s->fields)
1189 return false;
1190 if (table->s->null_bytes != altered_table->s->null_bytes)
1191 return false;
1192 if (ctx->changed_fields.elements() > 1)
1193 return false; // only support one field change
1194 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1195 ai < ctx->changed_fields.elements();
1196 ai++) {
1197 uint i = ctx->changed_fields.at(ai);
1198 Field *old_field = table->field[i];
1199 Field *new_field = altered_table->field[i];
1200 if (old_field->real_type() != new_field->real_type())
1201 return false; // no type conversions
1202 if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
1203 return false; // only varchar
1204 if (field_in_key_of_table(table, old_field) ||
1205 field_in_key_of_table(altered_table, new_field))
1206 return false; // not in any key
1207 if (!change_varchar_length_is_supported(old_field, new_field, ctx))
1208 return false;
1209 }
1210
1211 return true;
1212 }
1213
1214 // Debug function that ensures that the array is sorted
1215 static bool is_sorted(Dynamic_array<uint> &a) {
1216 bool r = true;
1217 if (a.elements() > 0) {
1218 uint lastelement = a.at(0);
1219 for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
1220 if (lastelement > a.at(i))
1221 r = false;
1222 }
1223 return r;
1224 }
1225
1226 int ha_tokudb::alter_table_expand_columns(
1227 TABLE* altered_table,
1228 Alter_inplace_info* ha_alter_info) {
1229
1230 int error = 0;
1231 tokudb_alter_ctx* ctx =
1232 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1233 // since we build the changed_fields array in field order, it must be sorted
1234 assert_always(is_sorted(ctx->changed_fields));
1235 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1236 error == 0 && ai < ctx->changed_fields.elements();
1237 ai++) {
1238 uint expand_field_num = ctx->changed_fields.at(ai);
1239 error = alter_table_expand_one_column(
1240 altered_table,
1241 ha_alter_info,
1242 expand_field_num);
1243 }
1244
1245 return error;
1246 }
1247
1248 // Return true if the field is an unsigned int
1249 static bool is_unsigned(Field *f) {
1250 return (f->flags & UNSIGNED_FLAG) != 0;
1251 }
1252
1253 // Return the starting offset in the value for a particular index (selected by
1254 // idx) of a particular field (selected by expand_field_num)
1255 // TODO: replace this?
1256 static uint32_t alter_table_field_offset(
1257 uint32_t null_bytes,
1258 KEY_AND_COL_INFO* kc_info,
1259 int idx,
1260 int expand_field_num) {
1261
1262 uint32_t offset = null_bytes;
1263 for (int i = 0; i < expand_field_num; i++) {
1264 if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
1265 continue;
1266 offset += kc_info->field_lengths[i];
1267 }
1268 return offset;
1269 }
1270
1271 // Send an expand message into all clustered indexes including the primary
1272 int ha_tokudb::alter_table_expand_one_column(
1273 TABLE* altered_table,
1274 Alter_inplace_info* ha_alter_info,
1275 int expand_field_num) {
1276
1277 int error = 0;
1278 tokudb_alter_ctx* ctx =
1279 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1280
1281 Field *old_field = table->field[expand_field_num];
1282 TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
1283 Field *new_field = altered_table->field[expand_field_num];
1284 TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
1285 assert_always(old_field_type == new_field_type);
1286
1287 uchar operation;
1288 uchar pad_char;
1289 switch (old_field_type) {
1290 case toku_type_int:
1291 assert_always(is_unsigned(old_field) == is_unsigned(new_field));
1292 if (is_unsigned(old_field))
1293 operation = UPDATE_OP_EXPAND_UINT;
1294 else
1295 operation = UPDATE_OP_EXPAND_INT;
1296 pad_char = 0;
1297 break;
1298 case toku_type_fixstring:
1299 operation = UPDATE_OP_EXPAND_CHAR;
1300 pad_char = old_field->charset()->pad_char;
1301 break;
1302 case toku_type_fixbinary:
1303 operation = UPDATE_OP_EXPAND_BINARY;
1304 pad_char = 0;
1305 break;
1306 default:
1307 assert_unreachable();
1308 }
1309
1310 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1311 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1312 // change to a new descriptor
1313 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1314 error = new_row_descriptor(
1315 altered_table, ha_alter_info, i, &row_descriptor);
1316 if (error)
1317 break;
1318 error = share->key_file[i]->change_descriptor(
1319 share->key_file[i],
1320 ctx->alter_txn,
1321 &row_descriptor,
1322 0);
1323 tokudb::memory::free(row_descriptor.data);
1324 if (error)
1325 break;
1326
1327 // for all trees that have values, make an expand update message and
1328 // broadcast it into the tree
1329 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1330 uint32_t old_offset = alter_table_field_offset(
1331 table_share->null_bytes,
1332 ctx->table_kc_info,
1333 i,
1334 expand_field_num);
1335 uint32_t new_offset = alter_table_field_offset(
1336 table_share->null_bytes,
1337 ctx->altered_table_kc_info,
1338 i,
1339 expand_field_num);
1340 assert_always(old_offset <= new_offset);
1341
1342 uint32_t old_length =
1343 ctx->table_kc_info->field_lengths[expand_field_num];
1344 assert_always(old_length == old_field->pack_length());
1345
1346 uint32_t new_length =
1347 ctx->altered_table_kc_info->field_lengths[expand_field_num];
1348 assert_always(new_length == new_field->pack_length());
1349
1350 DBT expand; memset(&expand, 0, sizeof(expand));
1351 expand.size =
1352 sizeof(operation) + sizeof(new_offset) +
1353 sizeof(old_length) + sizeof(new_length);
1354 if (operation == UPDATE_OP_EXPAND_CHAR ||
1355 operation == UPDATE_OP_EXPAND_BINARY)
1356 expand.size += sizeof(pad_char);
1357 expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1358 if (!expand.data) {
1359 error = ENOMEM;
1360 break;
1361 }
1362 uchar *expand_ptr = (uchar *)expand.data;
1363 expand_ptr[0] = operation;
1364 expand_ptr += sizeof operation;
1365
1366 // for the first altered field, old_offset == new_offset.
1367 // for the subsequent altered fields, the new_offset
1368 // should be used as it includes the length changes from the
1369 // previous altered fields.
1370 memcpy(expand_ptr, &new_offset, sizeof(new_offset));
1371 expand_ptr += sizeof(new_offset);
1372
1373 memcpy(expand_ptr, &old_length, sizeof(old_length));
1374 expand_ptr += sizeof(old_length);
1375
1376 memcpy(expand_ptr, &new_length, sizeof(new_length));
1377 expand_ptr += sizeof(new_length);
1378
1379 if (operation == UPDATE_OP_EXPAND_CHAR ||
1380 operation == UPDATE_OP_EXPAND_BINARY) {
1381 memcpy(expand_ptr, &pad_char, sizeof(pad_char));
1382 expand_ptr += sizeof(pad_char);
1383 }
1384
1385 assert_always(expand_ptr == (uchar*)expand.data + expand.size);
1386
1387 // and broadcast it into the tree
1388 error = share->key_file[i]->update_broadcast(
1389 share->key_file[i],
1390 ctx->alter_txn,
1391 &expand,
1392 DB_IS_RESETTING_OP);
1393 tokudb::memory::free(expand.data);
1394 if (error)
1395 break;
1396 }
1397 }
1398
1399 return error;
1400 }
1401
1402 static void marshall_blob_lengths(
1403 tokudb::buffer& b,
1404 uint32_t n,
1405 TABLE* table,
1406 KEY_AND_COL_INFO* kc_info) {
1407
1408 for (uint i = 0; i < n; i++) {
1409 uint blob_field_index = kc_info->blob_fields[i];
1410 assert_always(blob_field_index < table->s->fields);
1411 uint8_t blob_field_length =
1412 table->s->field[blob_field_index]->row_pack_length();
1413 b.append(&blob_field_length, sizeof blob_field_length);
1414 }
1415 }
1416
1417 int ha_tokudb::alter_table_expand_blobs(
1418 TABLE* altered_table,
1419 Alter_inplace_info* ha_alter_info) {
1420
1421 int error = 0;
1422 tokudb_alter_ctx* ctx =
1423 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1424
1425 uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1426 for (uint32_t i = 0; i < curr_num_DBs; i++) {
1427 // change to a new descriptor
1428 DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1429 error = new_row_descriptor(
1430 altered_table, ha_alter_info, i, &row_descriptor);
1431 if (error)
1432 break;
1433 error = share->key_file[i]->change_descriptor(
1434 share->key_file[i],
1435 ctx->alter_txn,
1436 &row_descriptor,
1437 0);
1438 tokudb::memory::free(row_descriptor.data);
1439 if (error)
1440 break;
1441
1442 // for all trees that have values, make an update blobs message and
1443 // broadcast it into the tree
1444 if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1445 tokudb::buffer b;
1446 uint8_t op = UPDATE_OP_EXPAND_BLOB;
1447 b.append(&op, sizeof op);
1448 b.append_ui<uint32_t>(
1449 table->s->null_bytes +
1450 ctx->table_kc_info->mcp_info[i].fixed_field_size);
1451 uint32_t var_offset_bytes =
1452 ctx->table_kc_info->mcp_info[i].len_of_offsets;
1453 b.append_ui<uint32_t>(var_offset_bytes);
1454 b.append_ui<uint32_t>(
1455 var_offset_bytes == 0 ? 0 :
1456 ctx->table_kc_info->num_offset_bytes);
1457
1458 // add blobs info
1459 uint32_t num_blobs = ctx->table_kc_info->num_blobs;
1460 b.append_ui<uint32_t>(num_blobs);
1461 marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
1462 marshall_blob_lengths(
1463 b,
1464 num_blobs,
1465 altered_table,
1466 ctx->altered_table_kc_info);
1467
1468 // and broadcast it into the tree
1469 DBT expand; memset(&expand, 0, sizeof expand);
1470 expand.data = b.data();
1471 expand.size = b.size();
1472 error = share->key_file[i]->update_broadcast(
1473 share->key_file[i],
1474 ctx->alter_txn,
1475 &expand,
1476 DB_IS_RESETTING_OP);
1477 if (error)
1478 break;
1479 }
1480 }
1481
1482 return error;
1483 }
1484
1485 // Return true if two fixed length fields can be changed inplace
1486 static bool change_fixed_length_is_supported(Field* old_field,
1487 Field* new_field,
1488 tokudb_alter_ctx* ctx) {
1489 // no change in size is supported
1490 if (old_field->pack_length() == new_field->pack_length())
1491 return true;
1492 // shrink is not supported
1493 if (old_field->pack_length() > new_field->pack_length())
1494 return false;
1495 ctx->expand_fixed_update_needed = true;
1496 return true;
1497 }
1498
1499 static bool change_blob_length_is_supported(Field* old_field,
1500 Field* new_field,
1501 tokudb_alter_ctx* ctx) {
1502 // blob -> longer or equal length blob
1503 if (old_field->binary() && new_field->binary() &&
1504 old_field->pack_length() <= new_field->pack_length()) {
1505 ctx->expand_blob_update_needed = true;
1506 return true;
1507 }
1508 // text -> longer or equal length text
1509 if (!old_field->binary() && !new_field->binary() &&
1510 old_field->pack_length() <= new_field->pack_length() &&
1511 old_field->charset()->number == new_field->charset()->number) {
1512 ctx->expand_blob_update_needed = true;
1513 return true;
1514 }
1515 return false;
1516 }
1517
1518 // Return true if the MySQL type is an int or unsigned int type
1519 static bool is_int_type(enum_field_types t) {
1520 switch (t) {
1521 case MYSQL_TYPE_TINY:
1522 case MYSQL_TYPE_SHORT:
1523 case MYSQL_TYPE_INT24:
1524 case MYSQL_TYPE_LONG:
1525 case MYSQL_TYPE_LONGLONG:
1526 return true;
1527 default:
1528 return false;
1529 }
1530 }
1531
1532 // Return true if two field types can be changed inplace
1533 static bool change_field_type_is_supported(Field* old_field,
1534 Field* new_field,
1535 tokudb_alter_ctx* ctx) {
1536 enum_field_types old_type = old_field->real_type();
1537 enum_field_types new_type = new_field->real_type();
1538 if (is_int_type(old_type)) {
1539 // int and unsigned int expansion
1540 if (is_int_type(new_type) &&
1541 is_unsigned(old_field) == is_unsigned(new_field))
1542 return change_fixed_length_is_supported(old_field, new_field, ctx);
1543 else
1544 return false;
1545 } else if (old_type == MYSQL_TYPE_STRING) {
1546 // char(X) -> char(Y) and binary(X) -> binary(Y) expansion
1547 if (new_type == MYSQL_TYPE_STRING &&
1548 old_field->binary() == new_field->binary() &&
1549 old_field->charset()->number == new_field->charset()->number)
1550 return change_fixed_length_is_supported(old_field, new_field, ctx);
1551 else
1552 return false;
1553 } else if (old_type == MYSQL_TYPE_VARCHAR) {
1554 // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
1555 // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
1556 // these cases
1557 return change_varchar_length_is_supported(old_field, new_field, ctx);
1558 } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
1559 return change_blob_length_is_supported(old_field, new_field, ctx);
1560 } else
1561 return false;
1562 }
1563
1564 // Return true if all changed field types can be changed inplace
1565 static bool change_type_is_supported(TABLE* table,
1566 TABLE* altered_table,
1567 tokudb_alter_ctx* ctx) {
1568 if (table->s->null_bytes != altered_table->s->null_bytes)
1569 return false;
1570 if (table->s->fields != altered_table->s->fields)
1571 return false;
1572 if (ctx->changed_fields.elements() > 1)
1573 return false; // only support one field change
1574 for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1575 ai < ctx->changed_fields.elements();
1576 ai++) {
1577 uint i = ctx->changed_fields.at(ai);
1578 Field *old_field = table->field[i];
1579 Field *new_field = altered_table->field[i];
1580 if (field_in_key_of_table(table, old_field) ||
1581 field_in_key_of_table(altered_table, new_field))
1582 return false;
1583 if (!change_field_type_is_supported(old_field, new_field, ctx))
1584 return false;
1585 }
1586 return true;
1587 }
1588
1589 // Allocate and initialize a new descriptor for a dictionary in the altered
1590 // table identified with idx.
1591 // Return the new descriptor in the row_descriptor DBT.
1592 // Return non-zero on error.
1593 int ha_tokudb::new_row_descriptor(TABLE* altered_table,
1594 Alter_inplace_info* ha_alter_info,
1595 uint32_t idx,
1596 DBT* row_descriptor) {
1597 int error = 0;
1598 tokudb_alter_ctx* ctx =
1599 static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1600 row_descriptor->size =
1601 get_max_desc_size(ctx->altered_table_kc_info, altered_table);
1602 row_descriptor->data =
1603 (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
1604 if (row_descriptor->data == NULL) {
1605 error = ENOMEM;
1606 } else {
1607 KEY* prim_key =
1608 hidden_primary_key ? NULL :
1609 &altered_table->s->key_info[primary_key];
1610 if (idx == primary_key) {
1611 row_descriptor->size = create_main_key_descriptor(
1612 (uchar*)row_descriptor->data,
1613 prim_key,
1614 hidden_primary_key,
1615 primary_key,
1616 altered_table,
1617 ctx->altered_table_kc_info);
1618 } else {
1619 row_descriptor->size = create_secondary_key_descriptor(
1620 (uchar*)row_descriptor->data,
1621 &altered_table->key_info[idx],
1622 prim_key,
1623 hidden_primary_key,
1624 altered_table,
1625 primary_key,
1626 idx,
1627 ctx->altered_table_kc_info);
1628 }
1629 error = 0;
1630 }
1631 return error;
1632 }
1633
1634 #endif
1635