1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of TokuDB
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     TokuDBis is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     TokuDB is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with TokuDB.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ======= */
23 
24 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25 
26 #if TOKU_INCLUDE_ALTER_56
27 
28 #if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
29 #define TOKU_ALTER_RENAME ALTER_RENAME
30 #define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
31 #elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
32       (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
33 #define TOKU_ALTER_RENAME ALTER_RENAME
34 #define DYNAMIC_ARRAY_ELEMENTS_TYPE int
35 #elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
36 #define TOKU_ALTER_RENAME ALTER_RENAME_56
37 #define DYNAMIC_ARRAY_ELEMENTS_TYPE int
38 #else
39 #error
40 #endif
41 
42 #include "ha_tokudb_alter_common.cc"
43 #include <sql_array.h>
44 #include <sql_base.h>
45 
46 // The tokudb alter context contains the alter state that is set in the check if supported method and used
47 // later when the alter operation is executed.
48 class tokudb_alter_ctx : public inplace_alter_handler_ctx {
49 public:
tokudb_alter_ctx()50     tokudb_alter_ctx() :
51         handler_flags(0),
52         alter_txn(NULL),
53         add_index_changed(false),
54         drop_index_changed(false),
55         reset_card(false),
56         compression_changed(false),
57         expand_varchar_update_needed(false),
58         expand_fixed_update_needed(false),
59         expand_blob_update_needed(false),
60         optimize_needed(false),
61         table_kc_info(NULL),
62         altered_table_kc_info(NULL) {
63     }
~tokudb_alter_ctx()64     ~tokudb_alter_ctx() {
65         if (altered_table_kc_info)
66             free_key_and_col_info(altered_table_kc_info);
67     }
68 public:
69     ulong handler_flags;
70     DB_TXN* alter_txn;
71     bool add_index_changed;
72     bool incremented_num_DBs, modified_DBs;
73     bool drop_index_changed;
74     bool reset_card;
75     bool compression_changed;
76     enum toku_compression_method orig_compression_method;
77     bool expand_varchar_update_needed;
78     bool expand_fixed_update_needed;
79     bool expand_blob_update_needed;
80     bool optimize_needed;
81     Dynamic_array<uint> changed_fields;
82     KEY_AND_COL_INFO* table_kc_info;
83     KEY_AND_COL_INFO* altered_table_kc_info;
84     KEY_AND_COL_INFO altered_table_kc_info_base;
85 };
86 
87 // Debug function to print out an alter table operation
print_alter_info(TABLE * altered_table,Alter_inplace_info * ha_alter_info)88 void ha_tokudb::print_alter_info(
89     TABLE* altered_table,
90     Alter_inplace_info* ha_alter_info) {
91 
92     TOKUDB_TRACE(
93         "***are keys of two tables same? %d",
94         tables_have_same_keys(table, altered_table, false, false));
95     if (ha_alter_info->handler_flags) {
96         TOKUDB_TRACE("***alter flags set ***");
97         for (int i = 0; i < 32; i++) {
98             if (ha_alter_info->handler_flags & (1 << i))
99                 TOKUDB_TRACE("%d", i);
100         }
101     }
102 
103     // everyone calculates data by doing some default_values - record[0], but
104     // I do not see why that is necessary
105     TOKUDB_TRACE("******");
106     TOKUDB_TRACE("***orig table***");
107     for (uint i = 0; i < table->s->fields; i++) {
108       //
109       // make sure to use table->field, and NOT table->s->field
110       //
111       Field* curr_field = table->field[i];
112       uint null_offset = get_null_offset(table, curr_field);
113       TOKUDB_TRACE(
114         "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
115         "%d, is_null %d, pack_length %u",
116         curr_field->field_name.str,
117         curr_field->real_type(),
118         mysql_to_toku_type(curr_field),
119         curr_field->null_bit,
120         null_offset,
121         curr_field->real_maybe_null(),
122         curr_field->real_maybe_null() ?
123             table->s->default_values[null_offset] & curr_field->null_bit :
124             0xffffffff,
125         curr_field->pack_length());
126     }
127     TOKUDB_TRACE("******");
128     TOKUDB_TRACE("***altered table***");
129     for (uint i = 0; i < altered_table->s->fields; i++) {
130       Field* curr_field = altered_table->field[i];
131       uint null_offset = get_null_offset(altered_table, curr_field);
132       TOKUDB_TRACE(
133             "name: %s, types: %u %u, nullable: %d, null_offset: %d, "
134             "is_null_field: %d, is_null %d, pack_length %u",
135             curr_field->field_name.str,
136             curr_field->real_type(),
137             mysql_to_toku_type(curr_field),
138             curr_field->null_bit,
139             null_offset,
140             curr_field->real_maybe_null(),
141             curr_field->real_maybe_null() ?
142                 altered_table->s->default_values[null_offset] &
143                 curr_field->null_bit : 0xffffffff,
144             curr_field->pack_length());
145     }
146     TOKUDB_TRACE("******");
147 }
148 
149 // Given two tables with equal number of fields, find all of the fields with
150 // different types and return the indexes of the different fields in the
151 // changed_fields array. This function ignores field name differences.
find_changed_fields(TABLE * table_a,TABLE * table_b,Dynamic_array<uint> & changed_fields)152 static int find_changed_fields(
153     TABLE* table_a,
154     TABLE* table_b,
155     Dynamic_array<uint>& changed_fields) {
156 
157     for (uint i = 0; i < table_a->s->fields; i++) {
158         Field* field_a = table_a->field[i];
159         Field* field_b = table_b->field[i];
160         if (!fields_are_same_type(field_a, field_b))
161             changed_fields.append(i);
162     }
163     return changed_fields.elements();
164 }
165 
166 static bool change_length_is_supported(TABLE* table,
167                                        TABLE* altered_table,
168                                        tokudb_alter_ctx* ctx);
169 
170 static bool change_type_is_supported(TABLE* table,
171                                      TABLE* altered_table,
172                                      tokudb_alter_ctx* ctx);
173 
174 // The ha_alter_info->handler_flags can not be trusted.
175 // This function maps the bogus handler flags to something we like.
fix_handler_flags(THD * thd,TABLE * table,TABLE * altered_table,Alter_inplace_info * ha_alter_info)176 static ulong fix_handler_flags(
177     THD* thd,
178     TABLE* table,
179     TABLE* altered_table,
180     Alter_inplace_info* ha_alter_info) {
181 
182     ulong handler_flags = ha_alter_info->handler_flags;
183 
184 #if 100000 <= MYSQL_VERSION_ID
185     // This is automatically supported, hide the flag from later checks
186     handler_flags &= ~ALTER_PARTITIONED;
187 #endif
188 
189     // workaround for fill_alter_inplace_info bug (#5193)
190     // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
191     // column addition that does not change the keys.
192     // the following code turns the ADD_INDEX and DROP_INDEX flags so that
193     // we can do hot column addition later.
194     if (handler_flags &
195         (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) {
196         if (handler_flags &
197             (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) {
198             if (tables_have_same_keys(
199                     table,
200                     altered_table,
201                     tokudb::sysvars::alter_print_error(thd) != 0, false)) {
202                 handler_flags &=
203                     ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
204                       ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX);
205             }
206         }
207     }
208 
209     // always allow rename table + any other operation, so turn off the
210     // rename flag
211     handler_flags &= ~ALTER_RENAME;
212 
213     // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
214     // so turn off the flag
215     if (handler_flags & ALTER_STORED_COLUMN_TYPE) {
216         if (all_fields_are_same_type(table, altered_table)) {
217             handler_flags &= ~ALTER_STORED_COLUMN_TYPE;
218         }
219     }
220 
221     return handler_flags;
222 }
223 
224 // Require that there is no intersection of add and drop names.
is_disjoint_add_drop(Alter_inplace_info * ha_alter_info)225 static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
226     for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
227         KEY* drop_key = ha_alter_info->index_drop_buffer[d];
228         for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
229             KEY* add_key =
230                 &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
231             if (strcmp(drop_key->name.str, add_key->name.str) == 0) {
232                 return false;
233             }
234         }
235     }
236     return true;
237 }
238 
239 // Return true if some bit in mask is set and no bit in ~mask is set,
240 // otherwise return false.
only_flags(ulong bits,ulong mask)241 static bool only_flags(ulong bits, ulong mask) {
242     return (bits & mask) != 0 && (bits & ~mask) == 0;
243 }
244 
245 // Table create options that should be ignored by TokuDB
246 // There are 25 total create options defined by mysql server (see handler.h),
247 // and only 4 options will touch engine data, either rebuild engine data or
248 // just update meta info:
249 //   1. HA_CREATE_USED_AUTO        update auto_inc info
250 //   2. HA_CREATE_USED_CHARSET     rebuild table if contains character columns
251 //   3. HA_CREATE_USED_ENGINE      rebuild table
252 //   4. HA_CREATE_USED_ROW_FORMAT  update compression method info
253 //
254 // All the others are either not supported by TokuDB or no need to
255 // touch engine data.
256 static constexpr uint32_t TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS =
257     HA_CREATE_USED_RAID |              // deprecated field
258     HA_CREATE_USED_UNION |             // for MERGE table
259     HA_CREATE_USED_INSERT_METHOD |     // for MERGE table
260     HA_CREATE_USED_MIN_ROWS |          // for MEMORY table
261     HA_CREATE_USED_MAX_ROWS |          // for NDB table
262     HA_CREATE_USED_AVG_ROW_LENGTH |    // for MyISAM table
263     HA_CREATE_USED_PACK_KEYS |         // for MyISAM table
264     HA_CREATE_USED_DEFAULT_CHARSET |   // no need to rebuild
265     HA_CREATE_USED_DATADIR |           // ignored by alter
266     HA_CREATE_USED_INDEXDIR |          // ignored by alter
267     HA_CREATE_USED_CHECKSUM |          // for MyISAM table
268     HA_CREATE_USED_DELAY_KEY_WRITE |   // for MyISAM table
269     HA_CREATE_USED_COMMENT |           // no need to rebuild
270     HA_CREATE_USED_PASSWORD |          // not supported by community version
271     HA_CREATE_USED_CONNECTION |        // for FEDERATED table
272     HA_CREATE_USED_KEY_BLOCK_SIZE |    // not supported by TokuDB
273     HA_CREATE_USED_TRANSACTIONAL |     // unused
274     HA_CREATE_USED_PAGE_CHECKSUM |     // unsued
275     HA_CREATE_USED_STATS_PERSISTENT |  // not supported by TokuDB
276     HA_CREATE_USED_STATS_AUTO_RECALC | // not supported by TokuDB
277     HA_CREATE_USED_STATS_SAMPLE_PAGES; // not supported by TokuDB
278 
279 // Check if an alter table operation on this table and described by the alter
280 // table parameters is supported inplace and if so, what type of locking is
281 // needed to execute it. return values:
282 
283 // HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
284 //  inplace operation, a table copy is required
285 
286 // HA_ALTER_ERROR: the alter table operation should fail
287 
288 // HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
289 
290 // HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X,
291 //  alter runs with MDL SNW
292 
293 // HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
294 //  concurrent reads, no writes
295 
296 // HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X,
297 //  alter runs with MDL SW
298 
299 // HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
300 //  concurrent reads, writes.
301 //  must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
302 //  deadlocks with the MDL lock and the table lock
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)303 enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
304     TABLE* altered_table,
305     Alter_inplace_info* ha_alter_info) {
306 
307     TOKUDB_HANDLER_DBUG_ENTER("");
308 
309     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
310         print_alter_info(altered_table, ha_alter_info);
311     }
312 
313     // default is NOT inplace
314     enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
315     THD* thd = ha_thd();
316 
317     // setup context
318     tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
319     ha_alter_info->handler_ctx = ctx;
320     ctx->handler_flags =
321         fix_handler_flags(thd, table, altered_table, ha_alter_info);
322     ctx->table_kc_info = &share->kc_info;
323     ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
324     memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
325 
326     if (tokudb::sysvars::disable_hot_alter(thd)) {
327         ; // do nothing
328     } else if (only_flags(
329                     ctx->handler_flags,
330                     ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
331                     ALTER_DROP_UNIQUE_INDEX +
332                     ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
333                     ALTER_ADD_UNIQUE_INDEX)) {
334         // add or drop index
335         if (table->s->null_bytes == altered_table->s->null_bytes &&
336             (ha_alter_info->index_add_count > 0 ||
337              ha_alter_info->index_drop_count > 0) &&
338             !tables_have_same_keys(
339                 table,
340                 altered_table,
341                 tokudb::sysvars::alter_print_error(thd) != 0, false) &&
342             is_disjoint_add_drop(ha_alter_info)) {
343 
344             if (ctx->handler_flags &
345                 (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
346                  ALTER_DROP_UNIQUE_INDEX)) {
347                 // the fractal tree can not handle dropping an index concurrent
348                 // with querying with the index.
349                 // we grab an exclusive MDL for the drop index.
350                 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
351             } else {
352 		/* FIXME: MDEV-16099 Use alter algorithm=nocopy
353 		or algorithm=instant for non-InnoDB engine */
354                 result = HA_ALTER_INPLACE_COPY_LOCK;
355 
356                 // someday, allow multiple hot indexes via alter table add key.
357                 // don't forget to change the store_lock function.
358                 // for now, hot indexing is only supported via session variable
359                 // with the create index sql command
360                 if (ha_alter_info->index_add_count == 1 &&
361                     // only one add or drop
362                     ha_alter_info->index_drop_count == 0 &&
363                     // must be add index not add unique index
364                     ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX &&
365                     // must be a create index command
366                     thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
367                     // must be enabled
368                     tokudb::sysvars::create_index_online(thd)) {
369                     // external_lock set WRITE_ALLOW_WRITE which allows writes
370                     // concurrent with the index creation
371                     /* FIXME: MDEV-16099 Use alter algorithm=nocopy
372 		    or algorithm=instant for non-InnoDB engine */
373                     result = HA_ALTER_INPLACE_COPY_NO_LOCK;
374                 }
375             }
376         }
377     } else if (only_flags(
378                     ctx->handler_flags,
379                     ALTER_COLUMN_DEFAULT)) {
380         // column default
381         if (table->s->null_bytes == altered_table->s->null_bytes)
382             result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
383     } else if (ctx->handler_flags & ALTER_COLUMN_NAME &&
384                only_flags(
385                     ctx->handler_flags,
386                     ALTER_COLUMN_NAME |
387                     ALTER_COLUMN_DEFAULT)) {
388         // column rename
389         // we have identified a possible column rename,
390         // but let's do some more checks
391 
392         // we will only allow an hcr if there are no changes
393         // in column positions (ALTER_STORED_COLUMN_ORDER is not set)
394 
395         // now need to verify that one and only one column
396         // has changed only its name. If we find anything to
397         // the contrary, we don't allow it, also check indexes
398         if (table->s->null_bytes == altered_table->s->null_bytes) {
399             bool cr_supported =
400                 column_rename_supported(
401                     table,
402                     altered_table,
403                     (ctx->handler_flags &
404                     ALTER_STORED_COLUMN_ORDER) != 0);
405             if (cr_supported)
406                 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
407         }
408     } else if (ctx->handler_flags & ALTER_ADD_COLUMN &&
409                only_flags(
410                     ctx->handler_flags,
411                     ALTER_ADD_COLUMN |
412                     ALTER_STORED_COLUMN_ORDER) &&
413                setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
414 
415         // add column
416         uint32_t added_columns[altered_table->s->fields];
417         uint32_t num_added_columns = 0;
418         int r =
419             find_changed_columns(
420                 added_columns,
421                 &num_added_columns,
422                 table,
423                 altered_table);
424         if (r == 0) {
425             if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
426                 for (uint32_t i = 0; i < num_added_columns; i++) {
427                     uint32_t curr_added_index = added_columns[i];
428                     Field* curr_added_field =
429                         altered_table->field[curr_added_index];
430                     TOKUDB_TRACE(
431                         "Added column: index %d, name %s",
432                         curr_added_index,
433                         curr_added_field->field_name.str);
434                 }
435             }
436             result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
437         }
438     } else if (ctx->handler_flags & ALTER_DROP_COLUMN &&
439                only_flags(
440                     ctx->handler_flags,
441                     ALTER_DROP_COLUMN |
442                     ALTER_STORED_COLUMN_ORDER) &&
443                setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
444 
445         // drop column
446         uint32_t dropped_columns[table->s->fields];
447         uint32_t num_dropped_columns = 0;
448         int r =
449             find_changed_columns(
450                 dropped_columns,
451                 &num_dropped_columns,
452                 altered_table,
453                 table);
454         if (r == 0) {
455             if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
456                 for (uint32_t i = 0; i < num_dropped_columns; i++) {
457                     uint32_t curr_dropped_index = dropped_columns[i];
458                     Field* curr_dropped_field = table->field[curr_dropped_index];
459                     TOKUDB_TRACE(
460                         "Dropped column: index %d, name %s",
461                         curr_dropped_index,
462                         curr_dropped_field->field_name.str);
463                 }
464             }
465             result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
466         }
467     } else if ((ctx->handler_flags &
468                 ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
469                 only_flags(
470                     ctx->handler_flags,
471                     ALTER_COLUMN_EQUAL_PACK_LENGTH |
472                     ALTER_COLUMN_DEFAULT) &&
473                 table->s->fields == altered_table->s->fields &&
474                 find_changed_fields(
475                     table,
476                     altered_table,
477                     ctx->changed_fields) > 0 &&
478                 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
479 
480         // change column length
481         if (change_length_is_supported(table, altered_table, ctx)) {
482             result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
483         }
484     } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) &&
485                 only_flags(
486                     ctx->handler_flags,
487                     ALTER_STORED_COLUMN_TYPE |
488                     ALTER_COLUMN_DEFAULT) &&
489                 table->s->fields == altered_table->s->fields &&
490                 find_changed_fields(
491                     table,
492                     altered_table,
493                     ctx->changed_fields) > 0 &&
494                 setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
495 
496         // change column type
497         if (change_type_is_supported(table, altered_table, ctx)) {
498             result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
499         }
500     } else if (only_flags(
501                     ctx->handler_flags,
502                     ALTER_CHANGE_CREATE_OPTION)) {
503 
504         HA_CREATE_INFO* create_info = ha_alter_info->create_info;
505 #if TOKU_INCLUDE_OPTION_STRUCTS
506         // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
507         // compression
508         if (create_info->option_struct->row_format !=
509             table_share->option_struct->row_format)
510             create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
511 #endif
512         // alter auto_increment
513         if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
514             // do a sanity check that the table is what we think it is
515             if (tables_have_same_keys_and_columns(
516                     table,
517                     altered_table,
518                     tokudb::sysvars::alter_print_error(thd) != 0)) {
519                 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
520             }
521         } else if (only_flags(
522                         create_info->used_fields,
523                         HA_CREATE_USED_ROW_FORMAT)) {
524             // alter row_format
525             // do a sanity check that the table is what we think it is
526             if (tables_have_same_keys_and_columns(
527                     table,
528                     altered_table,
529                     tokudb::sysvars::alter_print_error(thd) != 0)) {
530                 result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
531             }
532         } else if (only_flags(
533                         create_info->used_fields,
534                         TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS)) {
535             result = HA_ALTER_INPLACE_COPY_NO_LOCK;
536         }
537     }
538 #if TOKU_OPTIMIZE_WITH_RECREATE
539     else if (only_flags(
540                 ctx->handler_flags,
541                 ALTER_RECREATE_TABLE |
542                 ALTER_COLUMN_DEFAULT)) {
543         ctx->optimize_needed = true;
544         /* FIXME: MDEV-16099 Use alter algorithm=nocopy
545         or algorithm=instant for non-InnoDB engine */
546         result = HA_ALTER_INPLACE_COPY_NO_LOCK;
547     }
548 #endif
549 
550     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
551         result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
552         table->s->null_bytes != altered_table->s->null_bytes) {
553 
554         TOKUDB_HANDLER_TRACE("q %s", thd->query());
555         TOKUDB_HANDLER_TRACE(
556             "null bytes %u -> %u",
557             table->s->null_bytes,
558             altered_table->s->null_bytes);
559     }
560 
561     // turn a not supported result into an error if the slow alter table
562     // (copy) is disabled
563     if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
564         tokudb::sysvars::disable_slow_alter(thd)) {
565         print_error(HA_ERR_UNSUPPORTED, MYF(0));
566         result = HA_ALTER_ERROR;
567     }
568 
569     DBUG_RETURN(result);
570 }
571 
572 // Prepare for the alter operations
prepare_inplace_alter_table(TOKUDB_UNUSED (TABLE * altered_table),Alter_inplace_info * ha_alter_info)573 bool ha_tokudb::prepare_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
574                                             Alter_inplace_info* ha_alter_info) {
575     TOKUDB_HANDLER_DBUG_ENTER("");
576     tokudb_alter_ctx* ctx =
577         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
578     assert_always(transaction); // transaction must exist after table is locked
579     ctx->alter_txn = transaction;
580     bool result = false; // success
581     DBUG_RETURN(result);
582 }
583 
584 // Execute the alter operations.
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)585 bool ha_tokudb::inplace_alter_table(
586     TABLE* altered_table,
587     Alter_inplace_info* ha_alter_info) {
588 
589     TOKUDB_HANDLER_DBUG_ENTER("");
590 
591     int error = 0;
592     tokudb_alter_ctx* ctx =
593         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
594     HA_CREATE_INFO* create_info = ha_alter_info->create_info;
595 
596     // this should be enough to handle locking as the higher level MDL
597     // on this table should prevent any new analyze tasks.
598     share->cancel_background_jobs();
599 
600     if (error == 0 &&
601         (ctx->handler_flags &
602             (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX |
603              ALTER_DROP_UNIQUE_INDEX))) {
604         error = alter_table_drop_index(ha_alter_info);
605     }
606     if (error == 0 &&
607         (ctx->handler_flags &
608             (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
609              ALTER_ADD_UNIQUE_INDEX))) {
610         error = alter_table_add_index(ha_alter_info);
611     }
612     if (error == 0 &&
613         (ctx->handler_flags &
614             (ALTER_ADD_COLUMN |
615              ALTER_DROP_COLUMN))) {
616         error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
617     }
618     if (error == 0 &&
619         (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
620         (create_info->used_fields & HA_CREATE_USED_AUTO)) {
621         error = write_auto_inc_create(
622             share->status_block,
623             create_info->auto_increment_value,
624             ctx->alter_txn);
625     }
626     if (error == 0 &&
627         (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
628         (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
629         // Get the current compression
630         DB *db = share->key_file[0];
631         error = db->get_compression_method(db, &ctx->orig_compression_method);
632         assert_always(error == 0);
633 
634         // Set the new compression
635 #if TOKU_INCLUDE_OPTION_STRUCTS
636         toku_compression_method method =
637             row_format_to_toku_compression_method(
638                 (tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
639 #else
640         toku_compression_method method =
641             row_type_to_toku_compression_method(create_info->row_type);
642 #endif
643         uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
644         for (uint32_t i = 0; i < curr_num_DBs; i++) {
645             db = share->key_file[i];
646             error = db->change_compression_method(db, method);
647             if (error)
648                 break;
649             ctx->compression_changed = true;
650         }
651     }
652 
653     // note: only one column expansion is allowed
654 
655     if (error == 0 && ctx->expand_fixed_update_needed)
656         error = alter_table_expand_columns(altered_table, ha_alter_info);
657 
658     if (error == 0 && ctx->expand_varchar_update_needed)
659         error = alter_table_expand_varchar_offsets(
660             altered_table,
661             ha_alter_info);
662 
663     if (error == 0 && ctx->expand_blob_update_needed)
664         error = alter_table_expand_blobs(altered_table, ha_alter_info);
665 
666     if (error == 0 && ctx->reset_card) {
667         error = tokudb::alter_card(
668             share->status_block,
669             ctx->alter_txn,
670             table->s,
671             altered_table->s);
672     }
673     if (error == 0 && ctx->optimize_needed) {
674         error = do_optimize(ha_thd());
675     }
676 
677 
678 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
679 #if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
680     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
681 #if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
682     if (error == 0 &&
683         (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
684 #else
685     if (error == 0) {
686 #endif  // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
687         error = write_frm_data(
688             share->status_block,
689             ctx->alter_txn,
690             altered_table->s->path.str);
691     }
692 #endif  // (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) ||
693         // (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
694 #endif  // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
695 
696     bool result = false; // success
697     if (error) {
698         print_error(error, MYF(0));
699         result = true;  // failure
700     }
701 
702     DBUG_RETURN(result);
703 }
704 
705 int ha_tokudb::alter_table_add_index(Alter_inplace_info* ha_alter_info) {
706 
707     // sort keys in add index order
708     KEY* key_info = (KEY*)tokudb::memory::malloc(
709         sizeof(KEY) * ha_alter_info->index_add_count,
710         MYF(MY_WME));
711     for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
712         KEY *key = &key_info[i];
713         *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
714         for (KEY_PART_INFO* key_part = key->key_part;
715              key_part < key->key_part + key->user_defined_key_parts;
716              key_part++) {
717             key_part->field = table->field[key_part->fieldnr];
718         }
719     }
720 
721     tokudb_alter_ctx* ctx =
722         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
723     ctx->add_index_changed = true;
724     int error = tokudb_add_index(
725         table,
726         key_info,
727         ha_alter_info->index_add_count,
728         ctx->alter_txn,
729         &ctx->incremented_num_DBs,
730         &ctx->modified_DBs);
731     if (error == HA_ERR_FOUND_DUPP_KEY) {
732         // hack for now, in case of duplicate key error,
733         // because at the moment we cannot display the right key
734         // information to the user, so that he knows potentially what went
735         // wrong.
736         last_dup_key = MAX_KEY;
737     }
738 
739     tokudb::memory::free(key_info);
740 
741     if (error == 0)
742         ctx->reset_card = true;
743 
744     return error;
745 }
746 
747 static bool find_index_of_key(
748     const char* key_name,
749     TABLE* table,
750     uint* index_offset_ptr) {
751 
752     for (uint i = 0; i < table->s->keys; i++) {
753         if (strcmp(key_name, table->key_info[i].name.str) == 0) {
754             *index_offset_ptr = i;
755             return true;
756         }
757     }
758     return false;
759 }
760 
761 static bool find_index_of_key(
762     const char* key_name,
763     KEY* key_info,
764     uint key_count,
765     uint* index_offset_ptr) {
766 
767     for (uint i = 0; i < key_count; i++) {
768         if (strcmp(key_name, key_info[i].name.str) == 0) {
769             *index_offset_ptr = i;
770             return true;
771         }
772     }
773     return false;
774 }
775 
776 int ha_tokudb::alter_table_drop_index(Alter_inplace_info* ha_alter_info) {
777 
778     KEY *key_info = table->key_info;
779     // translate key names to indexes into the key_info array
780     uint index_drop_offsets[ha_alter_info->index_drop_count];
781     for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
782         bool found;
783         found = find_index_of_key(
784             ha_alter_info->index_drop_buffer[i]->name.str,
785             table,
786             &index_drop_offsets[i]);
787         if (!found) {
788             // undo of add key in partition engine
789             found = find_index_of_key(
790                 ha_alter_info->index_drop_buffer[i]->name.str,
791                 ha_alter_info->key_info_buffer,
792                 ha_alter_info->key_count,
793                 &index_drop_offsets[i]);
794             assert_always(found);
795             key_info = ha_alter_info->key_info_buffer;
796         }
797     }
798 
799     // drop indexes
800     tokudb_alter_ctx* ctx =
801         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
802     ctx->drop_index_changed = true;
803 
804     int error = drop_indexes(index_drop_offsets,
805                              ha_alter_info->index_drop_count,
806                              key_info,
807                              ctx->alter_txn);
808 
809     if (error == 0)
810         ctx->reset_card = true;
811 
812     return error;
813 }
814 
815 int ha_tokudb::alter_table_add_or_drop_column(
816     TABLE* altered_table,
817     Alter_inplace_info* ha_alter_info) {
818 
819     tokudb_alter_ctx* ctx =
820         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
821     int error;
822     uchar *column_extra = NULL;
823     uint32_t max_column_extra_size;
824     uint32_t num_column_extra;
825     uint32_t num_columns = 0;
826     uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
827     // set size such that we know it is big enough for both cases
828     uint32_t columns[table->s->fields + altered_table->s->fields];
829     memset(columns, 0, sizeof(columns));
830 
831     // generate the array of columns
832     if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) {
833         find_changed_columns(
834             columns,
835             &num_columns,
836             altered_table,
837             table);
838     } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) {
839         find_changed_columns(
840             columns,
841             &num_columns,
842             table,
843             altered_table);
844     } else {
845         assert_unreachable();
846     }
847     max_column_extra_size =
848         // max static row_mutator
849         STATIC_ROW_MUTATOR_SIZE +
850         // max dynamic row_mutator
851         4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
852         // max static blob size
853         (4 + share->kc_info.num_blobs) +
854         // max dynamic blob size
855         (num_columns*(1+4+1+4));
856     column_extra = (uchar*)tokudb::memory::malloc(
857         max_column_extra_size,
858         MYF(MY_WME));
859     if (column_extra == NULL) {
860         error = ENOMEM;
861         goto cleanup;
862     }
863 
864     for (uint32_t i = 0; i < curr_num_DBs; i++) {
865         // change to a new descriptor
866         DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
867         error = new_row_descriptor(
868             altered_table, ha_alter_info, i, &row_descriptor);
869         if (error)
870             goto cleanup;
871         error = share->key_file[i]->change_descriptor(
872             share->key_file[i],
873             ctx->alter_txn,
874             &row_descriptor,
875             0);
876         tokudb::memory::free(row_descriptor.data);
877         if (error)
878             goto cleanup;
879 
880         if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
881             num_column_extra = fill_row_mutator(
882                 column_extra,
883                 columns,
884                 num_columns,
885                 altered_table,
886                 ctx->altered_table_kc_info,
887                 i,
888                 // true if adding columns, otherwise is a drop
889                 (ha_alter_info->handler_flags &
890                  ALTER_ADD_COLUMN) != 0);
891 
892             DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
893             column_dbt.data = column_extra;
894             column_dbt.size = num_column_extra;
895             DBUG_ASSERT(num_column_extra <= max_column_extra_size);
896             error = share->key_file[i]->update_broadcast(
897                 share->key_file[i],
898                 ctx->alter_txn,
899                 &column_dbt,
900                 DB_IS_RESETTING_OP);
901             if (error) {
902                 goto cleanup;
903             }
904         }
905     }
906 
907     error = 0;
908  cleanup:
909     tokudb::memory::free(column_extra);
910     return error;
911 }
912 
913 // Commit or abort the alter operations.
914 // If commit then write the new frm data to the status using the alter
915 //    transaction.
916 // If abort then abort the alter transaction and try to rollback the
917 //    non-transactional changes.
918 bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
919                                            Alter_inplace_info* ha_alter_info,
920                                            bool commit) {
921     TOKUDB_HANDLER_DBUG_ENTER("");
922 
923     tokudb_alter_ctx* ctx =
924         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
925     bool result = false; // success
926     THD *thd = ha_thd();
927 
928     if (commit) {
929 #if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
930     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
931     (100000 <= MYSQL_VERSION_ID)
932         if (ha_alter_info->group_commit_ctx) {
933             ha_alter_info->group_commit_ctx = NULL;
934         }
935 #endif
936 #if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
937 #if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
938     (100000 <= MYSQL_VERSION_ID)
939 #if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
940         if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
941 #else
942         if (true) {
943 #endif  // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
944             int error = write_frm_data(
945                 share->status_block,
946                 ctx->alter_txn,
947                 altered_table->s->path.str);
948             if (error) {
949                 commit = false;
950                 result = true;
951                 print_error(error, MYF(0));
952             }
953         }
954 #endif  // (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) ||
955         // (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099)
956 #endif  // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
957     }
958 
959     if (!commit) {
960         if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
961             (ctx->add_index_changed || ctx->drop_index_changed ||
962              ctx->compression_changed)) {
963 
964             // get exclusive lock no matter what
965 #if defined(MARIADB_BASE_VERSION)
966             killed_state saved_killed_state = thd->killed;
967             thd->killed = NOT_KILLED;
968             for (volatile uint i = 0;
969                  wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
970                  i++) {
971                 if (thd->killed != NOT_KILLED)
972                     thd->killed = NOT_KILLED;
973                 sleep(1);
974             }
975             assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
976             if (thd->killed == NOT_KILLED)
977                 thd->killed = saved_killed_state;
978 #else
979             THD::killed_state saved_killed_state = thd->killed;
980             thd->killed = THD::NOT_KILLED;
981             // MySQL does not handle HA_EXTRA_NOT_USED so we use
982             // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
983             // the partition storage engine and is treated as a NOP by tokudb
984             for (volatile uint i = 0;
985                  wait_while_table_is_used(
986                     thd,
987                     table,
988                     HA_EXTRA_PREPARE_FOR_RENAME);
989                  i++) {
990                 if (thd->killed != THD::NOT_KILLED)
991                     thd->killed = THD::NOT_KILLED;
992                 sleep(1);
993             }
994             assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
995             if (thd->killed == THD::NOT_KILLED)
996                 thd->killed = saved_killed_state;
997 #endif
998         }
999 
1000         // abort the alter transaction NOW so that any alters are rolled back.
1001         // this allows the following restores to work.
1002         tokudb_trx_data* trx =
1003             (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
1004         assert_always(ctx->alter_txn == trx->stmt);
1005         assert_always(trx->tokudb_lock_count > 0);
1006         // for partitioned tables, we use a single transaction to do all of the
1007         // partition changes.  the tokudb_lock_count is a reference count for
1008         // each of the handlers to the same transaction.  obviously, we want
1009         // to only abort once.
1010         if (trx->tokudb_lock_count > 0) {
1011             if (--trx->tokudb_lock_count <= trx->create_lock_count) {
1012                 trx->create_lock_count = 0;
1013                 abort_txn(ctx->alter_txn);
1014                 ctx->alter_txn = NULL;
1015                 trx->stmt = NULL;
1016                 trx->sub_sp_level = NULL;
1017             }
1018             transaction = NULL;
1019         }
1020 
1021         if (ctx->add_index_changed) {
1022             restore_add_index(
1023                 table,
1024                 ha_alter_info->index_add_count,
1025                 ctx->incremented_num_DBs,
1026                 ctx->modified_DBs);
1027         }
1028         if (ctx->drop_index_changed) {
1029             // translate key names to indexes into the key_info array
1030             uint index_drop_offsets[ha_alter_info->index_drop_count];
1031             for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
1032                 bool found = find_index_of_key(
1033                     ha_alter_info->index_drop_buffer[i]->name.str,
1034                     table,
1035                     &index_drop_offsets[i]);
1036                 assert_always(found);
1037             }
1038             restore_drop_indexes(index_drop_offsets,
1039                                  ha_alter_info->index_drop_count);
1040         }
1041         if (ctx->compression_changed) {
1042             uint32_t curr_num_DBs =
1043                 table->s->keys + tokudb_test(hidden_primary_key);
1044             for (uint32_t i = 0; i < curr_num_DBs; i++) {
1045                 DB *db = share->key_file[i];
1046                 int error = db->change_compression_method(
1047                     db,
1048                     ctx->orig_compression_method);
1049                 assert_always(error == 0);
1050             }
1051         }
1052     }
1053     DBUG_RETURN(result);
1054 }
1055 
1056 // Setup the altered table's key and col info.
1057 int ha_tokudb::setup_kc_info(
1058     TABLE* altered_table,
1059     KEY_AND_COL_INFO* altered_kc_info) {
1060 
1061     int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
1062     if (error == 0)
1063         error = initialize_key_and_col_info(
1064             altered_table->s,
1065             altered_table,
1066             altered_kc_info,
1067             hidden_primary_key,
1068             primary_key);
1069     return error;
1070 }
1071 
1072 // Expand the variable length fields offsets from 1 to 2 bytes.
1073 int ha_tokudb::alter_table_expand_varchar_offsets(
1074     TABLE* altered_table,
1075     Alter_inplace_info* ha_alter_info) {
1076 
1077     int error = 0;
1078     tokudb_alter_ctx* ctx =
1079         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1080 
1081     uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1082     for (uint32_t i = 0; i < curr_num_DBs; i++) {
1083         // change to a new descriptor
1084         DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1085         error = new_row_descriptor(
1086             altered_table, ha_alter_info, i, &row_descriptor);
1087         if (error)
1088             break;
1089         error = share->key_file[i]->change_descriptor(
1090             share->key_file[i],
1091             ctx->alter_txn,
1092             &row_descriptor,
1093             0);
1094         tokudb::memory::free(row_descriptor.data);
1095         if (error)
1096             break;
1097 
1098         // for all trees that have values, make an update variable offsets
1099         // message and broadcast it into the tree
1100         if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1101             uint32_t offset_start =
1102                 table_share->null_bytes +
1103                 share->kc_info.mcp_info[i].fixed_field_size;
1104             uint32_t offset_end =
1105                 offset_start +
1106                 share->kc_info.mcp_info[i].len_of_offsets;
1107             uint32_t number_of_offsets = offset_end - offset_start;
1108 
1109             // make the expand variable offsets message
1110             DBT expand; memset(&expand, 0, sizeof expand);
1111             expand.size =
1112                 sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
1113             expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1114             if (!expand.data) {
1115                 error = ENOMEM;
1116                 break;
1117             }
1118             uchar* expand_ptr = (uchar*)expand.data;
1119             expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
1120             expand_ptr += sizeof(uchar);
1121 
1122             memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
1123             expand_ptr += sizeof(number_of_offsets);
1124 
1125             memcpy(expand_ptr, &offset_start, sizeof(offset_start));
1126             expand_ptr += sizeof(offset_start);
1127 
1128             // and broadcast it into the tree
1129             error = share->key_file[i]->update_broadcast(
1130                 share->key_file[i],
1131                 ctx->alter_txn,
1132                 &expand,
1133                 DB_IS_RESETTING_OP);
1134             tokudb::memory::free(expand.data);
1135             if (error)
1136                 break;
1137         }
1138     }
1139 
1140     return error;
1141 }
1142 
1143 // Return true if a field is part of a key
1144 static bool field_in_key(KEY *key, Field *field) {
1145     for (uint i = 0; i < key->user_defined_key_parts; i++) {
1146         KEY_PART_INFO *key_part = &key->key_part[i];
1147         if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0)
1148             return true;
1149     }
1150     return false;
1151 }
1152 
1153 // Return true if a field is part of any key
1154 static bool field_in_key_of_table(TABLE *table, Field *field) {
1155     for (uint i = 0; i < table->s->keys; i++) {
1156         if (field_in_key(&table->key_info[i], field))
1157             return true;
1158     }
1159     return false;
1160 }
1161 
1162 // Return true if all changed varchar/varbinary field lengths can be changed
1163 // inplace, otherwise return false
1164 static bool change_varchar_length_is_supported(Field* old_field,
1165                                                Field* new_field,
1166                                                tokudb_alter_ctx* ctx) {
1167     if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
1168         new_field->real_type() != MYSQL_TYPE_VARCHAR ||
1169         old_field->binary() != new_field->binary() ||
1170         old_field->charset()->number != new_field->charset()->number ||
1171         old_field->field_length > new_field->field_length)
1172         return false;
1173     if (ctx->table_kc_info->num_offset_bytes >
1174         ctx->altered_table_kc_info->num_offset_bytes)
1175         return false; // shrink is not supported
1176     if (ctx->table_kc_info->num_offset_bytes <
1177         ctx->altered_table_kc_info->num_offset_bytes)
1178         // sum of varchar lengths changed from 1 to 2
1179         ctx->expand_varchar_update_needed = true;
1180     return true;
1181 }
1182 
1183 // Return true if all changed field lengths can be changed inplace, otherwise
1184 // return false
1185 static bool change_length_is_supported(TABLE* table,
1186                                        TABLE* altered_table,
1187                                        tokudb_alter_ctx* ctx) {
1188     if (table->s->fields != altered_table->s->fields)
1189         return false;
1190     if (table->s->null_bytes != altered_table->s->null_bytes)
1191         return false;
1192     if (ctx->changed_fields.elements() > 1)
1193         return false; // only support one field change
1194     for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1195          ai < ctx->changed_fields.elements();
1196          ai++) {
1197         uint i = ctx->changed_fields.at(ai);
1198         Field *old_field = table->field[i];
1199         Field *new_field = altered_table->field[i];
1200         if (old_field->real_type() != new_field->real_type())
1201             return false; // no type conversions
1202         if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
1203             return false; // only varchar
1204         if (field_in_key_of_table(table, old_field) ||
1205             field_in_key_of_table(altered_table, new_field))
1206             return false; // not in any key
1207         if (!change_varchar_length_is_supported(old_field, new_field, ctx))
1208             return false;
1209     }
1210 
1211     return true;
1212 }
1213 
1214 // Debug function that ensures that the array is sorted
1215 static bool is_sorted(Dynamic_array<uint> &a) {
1216     bool r = true;
1217     if (a.elements() > 0) {
1218         uint lastelement = a.at(0);
1219         for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
1220             if (lastelement > a.at(i))
1221                 r = false;
1222     }
1223     return r;
1224 }
1225 
1226 int ha_tokudb::alter_table_expand_columns(
1227     TABLE* altered_table,
1228     Alter_inplace_info* ha_alter_info) {
1229 
1230     int error = 0;
1231     tokudb_alter_ctx* ctx =
1232         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1233     // since we build the changed_fields array in field order, it must be sorted
1234     assert_always(is_sorted(ctx->changed_fields));
1235     for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1236          error == 0 && ai < ctx->changed_fields.elements();
1237          ai++) {
1238         uint expand_field_num = ctx->changed_fields.at(ai);
1239         error = alter_table_expand_one_column(
1240             altered_table,
1241             ha_alter_info,
1242             expand_field_num);
1243     }
1244 
1245     return error;
1246 }
1247 
1248 // Return true if the field is an unsigned int
1249 static bool is_unsigned(Field *f) {
1250     return (f->flags & UNSIGNED_FLAG) != 0;
1251 }
1252 
1253 // Return the starting offset in the value for a particular index (selected by
1254 // idx) of a particular field (selected by expand_field_num)
1255 // TODO: replace this?
1256 static uint32_t alter_table_field_offset(
1257     uint32_t null_bytes,
1258     KEY_AND_COL_INFO* kc_info,
1259     int idx,
1260     int expand_field_num) {
1261 
1262     uint32_t offset = null_bytes;
1263     for (int i = 0; i < expand_field_num; i++) {
1264         if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
1265             continue;
1266         offset += kc_info->field_lengths[i];
1267     }
1268     return offset;
1269 }
1270 
1271 // Send an expand message into all clustered indexes including the primary
1272 int ha_tokudb::alter_table_expand_one_column(
1273     TABLE* altered_table,
1274     Alter_inplace_info* ha_alter_info,
1275     int expand_field_num) {
1276 
1277     int error = 0;
1278     tokudb_alter_ctx* ctx =
1279         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1280 
1281     Field *old_field = table->field[expand_field_num];
1282     TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
1283     Field *new_field = altered_table->field[expand_field_num];
1284     TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
1285     assert_always(old_field_type == new_field_type);
1286 
1287     uchar operation;
1288     uchar pad_char;
1289     switch (old_field_type) {
1290     case toku_type_int:
1291         assert_always(is_unsigned(old_field) == is_unsigned(new_field));
1292         if (is_unsigned(old_field))
1293             operation = UPDATE_OP_EXPAND_UINT;
1294         else
1295             operation = UPDATE_OP_EXPAND_INT;
1296         pad_char = 0;
1297         break;
1298     case toku_type_fixstring:
1299         operation = UPDATE_OP_EXPAND_CHAR;
1300         pad_char = old_field->charset()->pad_char;
1301         break;
1302     case toku_type_fixbinary:
1303         operation = UPDATE_OP_EXPAND_BINARY;
1304         pad_char = 0;
1305         break;
1306     default:
1307         assert_unreachable();
1308     }
1309 
1310     uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1311     for (uint32_t i = 0; i < curr_num_DBs; i++) {
1312         // change to a new descriptor
1313         DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1314         error = new_row_descriptor(
1315             altered_table, ha_alter_info, i, &row_descriptor);
1316         if (error)
1317             break;
1318         error = share->key_file[i]->change_descriptor(
1319             share->key_file[i],
1320             ctx->alter_txn,
1321             &row_descriptor,
1322             0);
1323         tokudb::memory::free(row_descriptor.data);
1324         if (error)
1325             break;
1326 
1327         // for all trees that have values, make an expand update message and
1328         // broadcast it into the tree
1329         if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1330             uint32_t old_offset = alter_table_field_offset(
1331                 table_share->null_bytes,
1332                 ctx->table_kc_info,
1333                 i,
1334                 expand_field_num);
1335             uint32_t new_offset = alter_table_field_offset(
1336                 table_share->null_bytes,
1337                 ctx->altered_table_kc_info,
1338                 i,
1339                 expand_field_num);
1340             assert_always(old_offset <= new_offset);
1341 
1342             uint32_t old_length =
1343                 ctx->table_kc_info->field_lengths[expand_field_num];
1344             assert_always(old_length == old_field->pack_length());
1345 
1346             uint32_t new_length =
1347                 ctx->altered_table_kc_info->field_lengths[expand_field_num];
1348             assert_always(new_length == new_field->pack_length());
1349 
1350             DBT expand; memset(&expand, 0, sizeof(expand));
1351             expand.size =
1352                 sizeof(operation) + sizeof(new_offset) +
1353                 sizeof(old_length) + sizeof(new_length);
1354             if (operation == UPDATE_OP_EXPAND_CHAR ||
1355                 operation == UPDATE_OP_EXPAND_BINARY)
1356                 expand.size += sizeof(pad_char);
1357             expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
1358             if (!expand.data) {
1359                 error = ENOMEM;
1360                 break;
1361             }
1362             uchar *expand_ptr = (uchar *)expand.data;
1363             expand_ptr[0] = operation;
1364             expand_ptr += sizeof operation;
1365 
1366             // for the first altered field, old_offset == new_offset.
1367             // for the subsequent altered fields, the new_offset
1368             // should be used as it includes the length changes from the
1369             // previous altered fields.
1370             memcpy(expand_ptr, &new_offset, sizeof(new_offset));
1371             expand_ptr += sizeof(new_offset);
1372 
1373             memcpy(expand_ptr, &old_length, sizeof(old_length));
1374             expand_ptr += sizeof(old_length);
1375 
1376             memcpy(expand_ptr, &new_length, sizeof(new_length));
1377             expand_ptr += sizeof(new_length);
1378 
1379             if (operation == UPDATE_OP_EXPAND_CHAR ||
1380                 operation == UPDATE_OP_EXPAND_BINARY) {
1381                 memcpy(expand_ptr, &pad_char, sizeof(pad_char));
1382                 expand_ptr += sizeof(pad_char);
1383             }
1384 
1385             assert_always(expand_ptr == (uchar*)expand.data + expand.size);
1386 
1387             // and broadcast it into the tree
1388             error = share->key_file[i]->update_broadcast(
1389                 share->key_file[i],
1390                 ctx->alter_txn,
1391                 &expand,
1392                 DB_IS_RESETTING_OP);
1393             tokudb::memory::free(expand.data);
1394             if (error)
1395                 break;
1396         }
1397     }
1398 
1399     return error;
1400 }
1401 
1402 static void marshall_blob_lengths(
1403     tokudb::buffer& b,
1404     uint32_t n,
1405     TABLE* table,
1406     KEY_AND_COL_INFO* kc_info) {
1407 
1408     for (uint i = 0; i < n; i++) {
1409         uint blob_field_index = kc_info->blob_fields[i];
1410         assert_always(blob_field_index < table->s->fields);
1411         uint8_t blob_field_length =
1412             table->s->field[blob_field_index]->row_pack_length();
1413         b.append(&blob_field_length, sizeof blob_field_length);
1414     }
1415 }
1416 
1417 int ha_tokudb::alter_table_expand_blobs(
1418     TABLE* altered_table,
1419     Alter_inplace_info* ha_alter_info) {
1420 
1421     int error = 0;
1422     tokudb_alter_ctx* ctx =
1423         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1424 
1425     uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
1426     for (uint32_t i = 0; i < curr_num_DBs; i++) {
1427         // change to a new descriptor
1428         DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
1429         error = new_row_descriptor(
1430             altered_table, ha_alter_info, i, &row_descriptor);
1431         if (error)
1432             break;
1433         error = share->key_file[i]->change_descriptor(
1434             share->key_file[i],
1435             ctx->alter_txn,
1436             &row_descriptor,
1437             0);
1438         tokudb::memory::free(row_descriptor.data);
1439         if (error)
1440             break;
1441 
1442         // for all trees that have values, make an update blobs message and
1443         // broadcast it into the tree
1444         if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
1445             tokudb::buffer b;
1446             uint8_t op = UPDATE_OP_EXPAND_BLOB;
1447             b.append(&op, sizeof op);
1448             b.append_ui<uint32_t>(
1449                 table->s->null_bytes +
1450                 ctx->table_kc_info->mcp_info[i].fixed_field_size);
1451             uint32_t var_offset_bytes =
1452                 ctx->table_kc_info->mcp_info[i].len_of_offsets;
1453             b.append_ui<uint32_t>(var_offset_bytes);
1454             b.append_ui<uint32_t>(
1455                 var_offset_bytes == 0 ? 0 :
1456                 ctx->table_kc_info->num_offset_bytes);
1457 
1458             // add blobs info
1459             uint32_t num_blobs = ctx->table_kc_info->num_blobs;
1460             b.append_ui<uint32_t>(num_blobs);
1461             marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
1462             marshall_blob_lengths(
1463                 b,
1464                 num_blobs,
1465                 altered_table,
1466                 ctx->altered_table_kc_info);
1467 
1468             // and broadcast it into the tree
1469             DBT expand; memset(&expand, 0, sizeof expand);
1470             expand.data = b.data();
1471             expand.size = b.size();
1472             error = share->key_file[i]->update_broadcast(
1473                 share->key_file[i],
1474                 ctx->alter_txn,
1475                 &expand,
1476                 DB_IS_RESETTING_OP);
1477             if (error)
1478                 break;
1479         }
1480     }
1481 
1482     return error;
1483 }
1484 
1485 // Return true if two fixed length fields can be changed inplace
1486 static bool change_fixed_length_is_supported(Field* old_field,
1487                                              Field* new_field,
1488                                              tokudb_alter_ctx* ctx) {
1489     // no change in size is supported
1490     if (old_field->pack_length() == new_field->pack_length())
1491         return true;
1492     // shrink is not supported
1493     if (old_field->pack_length() > new_field->pack_length())
1494         return false;
1495     ctx->expand_fixed_update_needed = true;
1496     return true;
1497 }
1498 
1499 static bool change_blob_length_is_supported(Field* old_field,
1500                                             Field* new_field,
1501                                             tokudb_alter_ctx* ctx) {
1502     // blob -> longer or equal length blob
1503     if (old_field->binary() && new_field->binary() &&
1504         old_field->pack_length() <= new_field->pack_length()) {
1505         ctx->expand_blob_update_needed = true;
1506         return true;
1507     }
1508     // text -> longer or equal length text
1509     if (!old_field->binary() && !new_field->binary() &&
1510         old_field->pack_length() <= new_field->pack_length() &&
1511         old_field->charset()->number == new_field->charset()->number) {
1512         ctx->expand_blob_update_needed = true;
1513         return true;
1514     }
1515     return false;
1516 }
1517 
1518 // Return true if the MySQL type is an int or unsigned int type
1519 static bool is_int_type(enum_field_types t) {
1520     switch (t) {
1521     case MYSQL_TYPE_TINY:
1522     case MYSQL_TYPE_SHORT:
1523     case MYSQL_TYPE_INT24:
1524     case MYSQL_TYPE_LONG:
1525     case MYSQL_TYPE_LONGLONG:
1526         return true;
1527     default:
1528         return false;
1529     }
1530 }
1531 
1532 // Return true if two field types can be changed inplace
1533 static bool change_field_type_is_supported(Field* old_field,
1534                                            Field* new_field,
1535                                            tokudb_alter_ctx* ctx) {
1536     enum_field_types old_type = old_field->real_type();
1537     enum_field_types new_type = new_field->real_type();
1538     if (is_int_type(old_type)) {
1539         // int and unsigned int expansion
1540         if (is_int_type(new_type) &&
1541             is_unsigned(old_field) == is_unsigned(new_field))
1542             return change_fixed_length_is_supported(old_field, new_field, ctx);
1543         else
1544             return false;
1545     } else if (old_type == MYSQL_TYPE_STRING) {
1546         // char(X) -> char(Y) and binary(X) -> binary(Y) expansion
1547         if (new_type == MYSQL_TYPE_STRING &&
1548             old_field->binary() == new_field->binary() &&
1549             old_field->charset()->number == new_field->charset()->number)
1550             return change_fixed_length_is_supported(old_field, new_field, ctx);
1551         else
1552             return false;
1553     } else if (old_type == MYSQL_TYPE_VARCHAR) {
1554         // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
1555         // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
1556         // these cases
1557         return change_varchar_length_is_supported(old_field, new_field, ctx);
1558     } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
1559         return change_blob_length_is_supported(old_field, new_field, ctx);
1560     } else
1561         return false;
1562 }
1563 
1564 // Return true if all changed field types can be changed inplace
1565 static bool change_type_is_supported(TABLE* table,
1566                                      TABLE* altered_table,
1567                                      tokudb_alter_ctx* ctx) {
1568     if (table->s->null_bytes != altered_table->s->null_bytes)
1569         return false;
1570     if (table->s->fields != altered_table->s->fields)
1571         return false;
1572     if (ctx->changed_fields.elements() > 1)
1573         return false; // only support one field change
1574     for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
1575          ai < ctx->changed_fields.elements();
1576          ai++) {
1577         uint i = ctx->changed_fields.at(ai);
1578         Field *old_field = table->field[i];
1579         Field *new_field = altered_table->field[i];
1580         if (field_in_key_of_table(table, old_field) ||
1581             field_in_key_of_table(altered_table, new_field))
1582             return false;
1583         if (!change_field_type_is_supported(old_field, new_field, ctx))
1584             return false;
1585     }
1586     return true;
1587 }
1588 
1589 // Allocate and initialize a new descriptor for a dictionary in the altered
1590 // table identified with idx.
1591 // Return the new descriptor in the row_descriptor DBT.
1592 // Return non-zero on error.
1593 int ha_tokudb::new_row_descriptor(TABLE* altered_table,
1594                                   Alter_inplace_info* ha_alter_info,
1595                                   uint32_t idx,
1596                                   DBT* row_descriptor) {
1597     int error = 0;
1598     tokudb_alter_ctx* ctx =
1599         static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
1600     row_descriptor->size =
1601         get_max_desc_size(ctx->altered_table_kc_info, altered_table);
1602     row_descriptor->data =
1603         (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
1604     if (row_descriptor->data == NULL) {
1605         error = ENOMEM;
1606     } else {
1607         KEY* prim_key =
1608             hidden_primary_key ? NULL :
1609             &altered_table->s->key_info[primary_key];
1610         if (idx == primary_key) {
1611             row_descriptor->size = create_main_key_descriptor(
1612                 (uchar*)row_descriptor->data,
1613                 prim_key,
1614                 hidden_primary_key,
1615                 primary_key,
1616                 altered_table,
1617                 ctx->altered_table_kc_info);
1618         } else {
1619             row_descriptor->size = create_secondary_key_descriptor(
1620                 (uchar*)row_descriptor->data,
1621                 &altered_table->key_info[idx],
1622                 prim_key,
1623                 hidden_primary_key,
1624                 altered_table,
1625                 primary_key,
1626                 idx,
1627                 ctx->altered_table_kc_info);
1628         }
1629         error = 0;
1630     }
1631     return error;
1632 }
1633 
1634 #endif
1635