1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2021, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software Foundation,
15 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /** @file handler.cc
18
19 @brief
20 Handler-calling-functions
21 */
22
23 #include "mariadb.h"
24 #include <inttypes.h>
25 #include "sql_priv.h"
26 #include "unireg.h"
27 #include "rpl_rli.h"
28 #include "sql_cache.h" // query_cache, query_cache_*
29 #include "sql_connect.h" // global_table_stats
30 #include "key.h" // key_copy, key_unpack, key_cmp_if_same, key_cmp
31 #include "sql_table.h" // build_table_filename
32 #include "sql_parse.h" // check_stack_overrun
33 #include "sql_base.h" // TDC_element
34 #include "discover.h" // extension_based_table_discovery, etc
35 #include "log_event.h" // *_rows_log_event
36 #include "create_options.h"
37 #include <myisampack.h>
38 #include "transaction.h"
39 #include "myisam.h"
40 #include "probes_mysql.h"
41 #include <mysql/psi/mysql_table.h>
42 #include <pfs_transaction_provider.h>
43 #include <mysql/psi/mysql_transaction.h>
44 #include "debug_sync.h" // DEBUG_SYNC
45 #include "sql_audit.h"
46 #include "ha_sequence.h"
47 #include "rowid_filter.h"
48 #include "mysys_err.h"
49
50 #ifdef WITH_PARTITION_STORAGE_ENGINE
51 #include "ha_partition.h"
52 #endif
53
54 #ifdef WITH_ARIA_STORAGE_ENGINE
55 #include "../storage/maria/ha_maria.h"
56 #endif
57 #include "semisync_master.h"
58
59 #include "wsrep_mysqld.h"
60 #ifdef WITH_WSREP
61 #include "wsrep_binlog.h"
62 #include "wsrep_xid.h"
63 #include "wsrep_thd.h"
64 #include "wsrep_trans_observer.h" /* wsrep transaction hooks */
65 #include "wsrep_var.h" /* wsrep_hton_check() */
66 #endif /* WITH_WSREP */
67
68 /**
69 @def MYSQL_TABLE_LOCK_WAIT
70 Instrumentation helper for table io_waits.
71 @param OP the table operation to be performed
72 @param FLAGS per table operation flags.
73 @param PAYLOAD the code to instrument.
74 @sa MYSQL_END_TABLE_WAIT.
75 */
76 #ifdef HAVE_PSI_TABLE_INTERFACE
77 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
78 { \
79 if (m_psi != NULL) \
80 { \
81 PSI_table_locker *locker; \
82 PSI_table_locker_state state; \
83 locker= PSI_TABLE_CALL(start_table_lock_wait) \
84 (& state, m_psi, OP, FLAGS, \
85 __FILE__, __LINE__); \
86 PAYLOAD \
87 if (locker != NULL) \
88 PSI_TABLE_CALL(end_table_lock_wait)(locker); \
89 } \
90 else \
91 { \
92 PAYLOAD \
93 } \
94 }
95 #else
96 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
97 PAYLOAD
98 #endif
99
100
101 /*
102 While we have legacy_db_type, we have this array to
103 check for dups and to find handlerton from legacy_db_type.
104 Remove when legacy_db_type is finally gone
105 */
106 st_plugin_int *hton2plugin[MAX_HA];
107
108 static handlerton *installed_htons[128];
109
110 #define BITMAP_STACKBUF_SIZE (128/8)
111
112 KEY_CREATE_INFO default_key_create_info=
113 { HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true };
114
115 /* number of entries in handlertons[] */
116 ulong total_ha= 0;
117 /* number of storage engines (from handlertons[]) that support 2pc */
118 ulong total_ha_2pc= 0;
119 #ifdef DBUG_ASSERT_EXISTS
120 /*
121 Number of non-mandatory 2pc handlertons whose initialization failed
122 to estimate total_ha_2pc value under supposition of the failures
123 have not occcured.
124 */
125 ulong failed_ha_2pc= 0;
126 #endif
127 /* size of savepoint storage area (see ha_init) */
128 ulong savepoint_alloc_size= 0;
129
130 static const LEX_CSTRING sys_table_aliases[]=
131 {
132 { STRING_WITH_LEN("INNOBASE") }, { STRING_WITH_LEN("INNODB") },
133 { STRING_WITH_LEN("HEAP") }, { STRING_WITH_LEN("MEMORY") },
134 { STRING_WITH_LEN("MERGE") }, { STRING_WITH_LEN("MRG_MYISAM") },
135 { STRING_WITH_LEN("Maria") }, { STRING_WITH_LEN("Aria") },
136 {NullS, 0}
137 };
138
139 const char *ha_row_type[] = {
140 "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE"
141 };
142
143 const char *tx_isolation_names[] =
144 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
145 NullS};
146 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
147 tx_isolation_names, NULL};
148
149 static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
150 uint known_extensions_id= 0;
151
152
153 class Table_exists_error_handler : public Internal_error_handler
154 {
155 public:
Table_exists_error_handler()156 Table_exists_error_handler()
157 : m_handled_errors(0), m_unhandled_errors(0)
158 {}
159
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_warning_level * level,const char * msg,Sql_condition ** cond_hdl)160 bool handle_condition(THD *thd,
161 uint sql_errno,
162 const char* sqlstate,
163 Sql_condition::enum_warning_level *level,
164 const char* msg,
165 Sql_condition ** cond_hdl)
166 {
167 *cond_hdl= NULL;
168 if (non_existing_table_error(sql_errno))
169 {
170 m_handled_errors++;
171 return TRUE;
172 }
173
174 if (*level == Sql_condition::WARN_LEVEL_ERROR)
175 m_unhandled_errors++;
176 return FALSE;
177 }
178
safely_trapped_errors()179 bool safely_trapped_errors()
180 {
181 return ((m_handled_errors > 0) && (m_unhandled_errors == 0));
182 }
183
184 private:
185 int m_handled_errors;
186 int m_unhandled_errors;
187 };
188
189
190 static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
191 bool is_real_trans);
192
193
ha_default_plugin(THD * thd)194 static plugin_ref ha_default_plugin(THD *thd)
195 {
196 if (thd->variables.table_plugin)
197 return thd->variables.table_plugin;
198 return my_plugin_lock(thd, global_system_variables.table_plugin);
199 }
200
ha_default_tmp_plugin(THD * thd)201 static plugin_ref ha_default_tmp_plugin(THD *thd)
202 {
203 if (thd->variables.tmp_table_plugin)
204 return thd->variables.tmp_table_plugin;
205 if (global_system_variables.tmp_table_plugin)
206 return my_plugin_lock(thd, global_system_variables.tmp_table_plugin);
207 return ha_default_plugin(thd);
208 }
209
210
211 /** @brief
212 Return the default storage engine handlerton for thread
213
214 SYNOPSIS
215 ha_default_handlerton(thd)
216 thd current thread
217
218 RETURN
219 pointer to handlerton
220 */
ha_default_handlerton(THD * thd)221 handlerton *ha_default_handlerton(THD *thd)
222 {
223 plugin_ref plugin= ha_default_plugin(thd);
224 DBUG_ASSERT(plugin);
225 handlerton *hton= plugin_hton(plugin);
226 DBUG_ASSERT(hton);
227 return hton;
228 }
229
230
ha_default_tmp_handlerton(THD * thd)231 handlerton *ha_default_tmp_handlerton(THD *thd)
232 {
233 plugin_ref plugin= ha_default_tmp_plugin(thd);
234 DBUG_ASSERT(plugin);
235 handlerton *hton= plugin_hton(plugin);
236 DBUG_ASSERT(hton);
237 return hton;
238 }
239
240
241 /** @brief
242 Return the storage engine handlerton for the supplied name
243
244 SYNOPSIS
245 ha_resolve_by_name(thd, name)
246 thd current thread
247 name name of storage engine
248
249 RETURN
250 pointer to storage engine plugin handle
251 */
ha_resolve_by_name(THD * thd,const LEX_CSTRING * name,bool tmp_table)252 plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
253 bool tmp_table)
254 {
255 const LEX_CSTRING *table_alias;
256 plugin_ref plugin;
257
258 redo:
259 if (thd && !my_charset_latin1.strnncoll(
260 (const uchar *)name->str, name->length,
261 (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
262 return tmp_table ? ha_default_tmp_plugin(thd) : ha_default_plugin(thd);
263
264 if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
265 {
266 handlerton *hton= plugin_hton(plugin);
267 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
268 return plugin;
269
270 /*
271 unlocking plugin immediately after locking is relatively low cost.
272 */
273 plugin_unlock(thd, plugin);
274 }
275
276 /*
277 We check for the historical aliases.
278 */
279 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
280 {
281 if (!my_charset_latin1.strnncoll(
282 (const uchar *)name->str, name->length,
283 (const uchar *)table_alias->str, table_alias->length))
284 {
285 name= table_alias + 1;
286 goto redo;
287 }
288 }
289
290 return NULL;
291 }
292
293
294 bool
resolve_storage_engine_with_error(THD * thd,handlerton ** ha,bool tmp_table)295 Storage_engine_name::resolve_storage_engine_with_error(THD *thd,
296 handlerton **ha,
297 bool tmp_table)
298 {
299 if (plugin_ref plugin= ha_resolve_by_name(thd, &m_storage_engine_name,
300 tmp_table))
301 {
302 *ha= plugin_hton(plugin);
303 return false;
304 }
305
306 *ha= NULL;
307 if (thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)
308 {
309 my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), m_storage_engine_name.str);
310 return true;
311 }
312 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
313 ER_UNKNOWN_STORAGE_ENGINE,
314 ER_THD(thd, ER_UNKNOWN_STORAGE_ENGINE),
315 m_storage_engine_name.str);
316 return false;
317 }
318
319
ha_lock_engine(THD * thd,const handlerton * hton)320 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
321 {
322 if (hton)
323 {
324 st_plugin_int *plugin= hton2plugin[hton->slot];
325 return my_plugin_lock(thd, plugin_int_to_ref(plugin));
326 }
327 return NULL;
328 }
329
330
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)331 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
332 {
333 plugin_ref plugin;
334 switch (db_type) {
335 case DB_TYPE_DEFAULT:
336 return ha_default_handlerton(thd);
337 default:
338 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
339 (plugin= ha_lock_engine(thd, installed_htons[db_type])))
340 return plugin_hton(plugin);
341 /* fall through */
342 case DB_TYPE_UNKNOWN:
343 return NULL;
344 }
345 }
346
347
348 /**
349 Use other database handler if databasehandler is not compiled in.
350 */
ha_checktype(THD * thd,handlerton * hton,bool no_substitute)351 handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute)
352 {
353 if (ha_storage_engine_is_enabled(hton))
354 return hton;
355
356 if (no_substitute)
357 return NULL;
358 #ifdef WITH_WSREP
359 (void)wsrep_after_rollback(thd, false);
360 #endif /* WITH_WSREP */
361
362 return ha_default_handlerton(thd);
363 } /* ha_checktype */
364
365
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)366 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
367 handlerton *db_type)
368 {
369 handler *file;
370 DBUG_ENTER("get_new_handler");
371 DBUG_PRINT("enter", ("alloc: %p", alloc));
372
373 if (ha_storage_engine_is_enabled(db_type))
374 {
375 if ((file= db_type->create(db_type, share, alloc)))
376 file->init();
377 DBUG_RETURN(file);
378 }
379 /*
380 Try the default table type
381 Here the call to current_thd() is ok as we call this function a lot of
382 times but we enter this branch very seldom.
383 */
384 file= get_new_handler(share, alloc, ha_default_handlerton(current_thd));
385 DBUG_RETURN(file);
386 }
387
388
389 #ifdef WITH_PARTITION_STORAGE_ENGINE
get_ha_partition(partition_info * part_info)390 handler *get_ha_partition(partition_info *part_info)
391 {
392 ha_partition *partition;
393 DBUG_ENTER("get_ha_partition");
394 if ((partition= new ha_partition(partition_hton, part_info)))
395 {
396 if (partition->initialize_partition(current_thd->mem_root))
397 {
398 delete partition;
399 partition= 0;
400 }
401 else
402 partition->init();
403 }
404 else
405 {
406 my_error(ER_OUTOFMEMORY, MYF(ME_FATAL),
407 static_cast<int>(sizeof(ha_partition)));
408 }
409 DBUG_RETURN(((handler*) partition));
410 }
411 #endif
412
413 static const char **handler_errmsgs;
414
415 C_MODE_START
get_handler_errmsgs(int nr)416 static const char **get_handler_errmsgs(int nr)
417 {
418 return handler_errmsgs;
419 }
420 C_MODE_END
421
422
423 /**
424 Register handler error messages for use with my_error().
425
426 @retval
427 0 OK
428 @retval
429 !=0 Error
430 */
431
ha_init_errors(void)432 int ha_init_errors(void)
433 {
434 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
435
436 /* Allocate a pointer array for the error message strings. */
437 /* Zerofill it to avoid uninitialized gaps. */
438 if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
439 HA_ERR_ERRORS * sizeof(char*),
440 MYF(MY_WME | MY_ZEROFILL))))
441 return 1;
442
443 /* Set the dedicated error messages. */
444 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
445 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
446 SETMSG(HA_ERR_RECORD_CHANGED, "Update which is recoverable");
447 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
448 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
449 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
450 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
451 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
452 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
453 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
454 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
455 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
456 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
457 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
458 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
459 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
460 SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
461 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
462 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
463 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
464 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
465 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
466 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
467 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
468 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
469 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
470 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
471 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
472 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
473 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
474 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
475 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
476 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
477 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
478 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
479 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
480 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
481 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
482 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
483 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
484 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
485 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
486 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
487 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
488 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
489 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
490 SETMSG(HA_ERR_DISK_FULL, ER_DEFAULT(ER_DISK_FULL));
491 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
492 SETMSG(HA_ERR_FK_DEPTH_EXCEEDED, "Foreign key cascade delete/update exceeds");
493 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
494
495 /* Register the error messages for use with my_error(). */
496 return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
497 }
498
499
500 /**
501 Unregister handler error messages.
502
503 @retval
504 0 OK
505 @retval
506 !=0 Error
507 */
ha_finish_errors(void)508 static int ha_finish_errors(void)
509 {
510 /* Allocate a pointer array for the error message strings. */
511 my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
512 my_free(handler_errmsgs);
513 handler_errmsgs= 0;
514 return 0;
515 }
516
517 static Atomic_counter<int32> need_full_discover_for_existence(0);
518 static Atomic_counter<int32> engines_with_discover_file_names(0);
519 static Atomic_counter<int32> engines_with_discover(0);
520
full_discover_for_existence(handlerton *,const char *,const char *)521 static int full_discover_for_existence(handlerton *, const char *, const char *)
522 { return 0; }
523
ext_based_existence(handlerton *,const char *,const char *)524 static int ext_based_existence(handlerton *, const char *, const char *)
525 { return 0; }
526
hton_ext_based_table_discovery(handlerton * hton,LEX_CSTRING * db,MY_DIR * dir,handlerton::discovered_list * result)527 static int hton_ext_based_table_discovery(handlerton *hton, LEX_CSTRING *db,
528 MY_DIR *dir, handlerton::discovered_list *result)
529 {
530 /*
531 tablefile_extensions[0] is the metadata file, see
532 the comment above tablefile_extensions declaration
533 */
534 return extension_based_table_discovery(dir, hton->tablefile_extensions[0],
535 result);
536 }
537
update_discovery_counters(handlerton * hton,int val)538 static void update_discovery_counters(handlerton *hton, int val)
539 {
540 if (hton->discover_table_existence == full_discover_for_existence)
541 need_full_discover_for_existence+= val;
542
543 if (hton->discover_table_names && hton->tablefile_extensions[0])
544 engines_with_discover_file_names+= val;
545
546 if (hton->discover_table)
547 engines_with_discover+= val;
548 }
549
ha_drop_table(THD * thd,handlerton * hton,const char * path)550 int ha_drop_table(THD *thd, handlerton *hton, const char *path)
551 {
552 if (ha_check_if_updates_are_ignored(thd, hton, "DROP"))
553 return 0; // Simulate dropped
554 return hton->drop_table(hton, path);
555 }
556
hton_drop_table(handlerton * hton,const char * path)557 static int hton_drop_table(handlerton *hton, const char *path)
558 {
559 char tmp_path[FN_REFLEN];
560 handler *file= get_new_handler(nullptr, current_thd->mem_root, hton);
561 if (!file)
562 return ENOMEM;
563 path= get_canonical_filename(file, path, tmp_path);
564 int error= file->delete_table(path);
565 delete file;
566 return error;
567 }
568
569
ha_finalize_handlerton(st_plugin_int * plugin)570 int ha_finalize_handlerton(st_plugin_int *plugin)
571 {
572 handlerton *hton= (handlerton *)plugin->data;
573 DBUG_ENTER("ha_finalize_handlerton");
574
575 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
576 if (!hton)
577 goto end;
578
579 if (installed_htons[hton->db_type] == hton)
580 installed_htons[hton->db_type]= NULL;
581
582 if (hton->panic)
583 hton->panic(hton, HA_PANIC_CLOSE);
584
585 if (plugin->plugin->deinit)
586 {
587 /*
588 Today we have no defined/special behavior for uninstalling
589 engine plugins.
590 */
591 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
592 if (plugin->plugin->deinit(NULL))
593 {
594 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
595 plugin->name.str));
596 }
597 }
598
599 free_sysvar_table_options(hton);
600 update_discovery_counters(hton, -1);
601
602 /*
603 In case a plugin is uninstalled and re-installed later, it should
604 reuse an array slot. Otherwise the number of uninstall/install
605 cycles would be limited.
606 */
607 if (hton->slot != HA_SLOT_UNDEF)
608 {
609 /* Make sure we are not unpluging another plugin */
610 DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
611 DBUG_ASSERT(hton->slot < MAX_HA);
612 hton2plugin[hton->slot]= NULL;
613 }
614
615 my_free(hton);
616
617 end:
618 DBUG_RETURN(0);
619 }
620
621
ha_initialize_handlerton(st_plugin_int * plugin)622 int ha_initialize_handlerton(st_plugin_int *plugin)
623 {
624 handlerton *hton;
625 static const char *no_exts[]= { 0 };
626 DBUG_ENTER("ha_initialize_handlerton");
627 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
628
629 hton= (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton),
630 MYF(MY_WME | MY_ZEROFILL));
631 if (hton == NULL)
632 {
633 sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
634 plugin->name.str);
635 goto err_no_hton_memory;
636 }
637
638 hton->tablefile_extensions= no_exts;
639 hton->discover_table_names= hton_ext_based_table_discovery;
640 hton->drop_table= hton_drop_table;
641
642 hton->slot= HA_SLOT_UNDEF;
643 /* Historical Requirement */
644 plugin->data= hton; // shortcut for the future
645 if (plugin->plugin->init && plugin->plugin->init(hton))
646 {
647 sql_print_error("Plugin '%s' init function returned error.",
648 plugin->name.str);
649 goto err;
650 }
651
652 // hton_ext_based_table_discovery() works only when discovery
653 // is supported and the engine if file-based.
654 if (hton->discover_table_names == hton_ext_based_table_discovery &&
655 (!hton->discover_table || !hton->tablefile_extensions[0]))
656 hton->discover_table_names= NULL;
657
658 // default discover_table_existence implementation
659 if (!hton->discover_table_existence && hton->discover_table)
660 {
661 if (hton->tablefile_extensions[0])
662 hton->discover_table_existence= ext_based_existence;
663 else
664 hton->discover_table_existence= full_discover_for_existence;
665 }
666
667 uint tmp;
668 ulong fslot;
669
670 DBUG_EXECUTE_IF("unstable_db_type", {
671 static int i= (int) DB_TYPE_FIRST_DYNAMIC;
672 hton->db_type= (enum legacy_db_type)++i;
673 });
674
675 /* now check the db_type for conflict */
676 if (hton->db_type <= DB_TYPE_UNKNOWN ||
677 hton->db_type >= DB_TYPE_DEFAULT ||
678 installed_htons[hton->db_type])
679 {
680 int idx= (int) DB_TYPE_FIRST_DYNAMIC;
681
682 while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
683 idx++;
684
685 if (idx == (int) DB_TYPE_DEFAULT)
686 {
687 sql_print_warning("Too many storage engines!");
688 goto err_deinit;
689 }
690 if (hton->db_type != DB_TYPE_UNKNOWN)
691 sql_print_warning("Storage engine '%s' has conflicting typecode. "
692 "Assigning value %d.", plugin->plugin->name, idx);
693 hton->db_type= (enum legacy_db_type) idx;
694 }
695
696 /*
697 In case a plugin is uninstalled and re-installed later, it should
698 reuse an array slot. Otherwise the number of uninstall/install
699 cycles would be limited. So look for a free slot.
700 */
701 DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
702 for (fslot= 0; fslot < total_ha; fslot++)
703 {
704 if (!hton2plugin[fslot])
705 break;
706 }
707 if (fslot < total_ha)
708 hton->slot= fslot;
709 else
710 {
711 if (total_ha >= MAX_HA)
712 {
713 sql_print_error("Too many plugins loaded. Limit is %lu. "
714 "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
715 goto err_deinit;
716 }
717 hton->slot= total_ha++;
718 }
719 installed_htons[hton->db_type]= hton;
720 tmp= hton->savepoint_offset;
721 hton->savepoint_offset= savepoint_alloc_size;
722 savepoint_alloc_size+= tmp;
723 hton2plugin[hton->slot]=plugin;
724 if (hton->prepare)
725 {
726 total_ha_2pc++;
727 if (tc_log && tc_log != get_tc_log_implementation())
728 {
729 total_ha_2pc--;
730 hton->prepare= 0;
731 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
732 ER_UNKNOWN_ERROR,
733 "Cannot enable tc-log at run-time. "
734 "XA features of %s are disabled",
735 plugin->name.str);
736 }
737 }
738
739 /*
740 This is entirely for legacy. We will create a new "disk based" hton and a
741 "memory" hton which will be configurable longterm. We should be able to
742 remove partition.
743 */
744 switch (hton->db_type) {
745 case DB_TYPE_HEAP:
746 heap_hton= hton;
747 break;
748 case DB_TYPE_MYISAM:
749 myisam_hton= hton;
750 break;
751 case DB_TYPE_PARTITION_DB:
752 partition_hton= hton;
753 break;
754 case DB_TYPE_SEQUENCE:
755 sql_sequence_hton= hton;
756 break;
757 default:
758 break;
759 };
760
761 resolve_sysvar_table_options(hton);
762 update_discovery_counters(hton, 1);
763
764 DBUG_RETURN(0);
765
766 err_deinit:
767 /*
768 Let plugin do its inner deinitialization as plugin->init()
769 was successfully called before.
770 */
771 if (plugin->plugin->deinit)
772 (void) plugin->plugin->deinit(NULL);
773
774 err:
775 #ifdef DBUG_ASSERT_EXISTS
776 if (hton->prepare)
777 failed_ha_2pc++;
778 #endif
779 my_free(hton);
780 err_no_hton_memory:
781 plugin->data= NULL;
782 DBUG_RETURN(1);
783 }
784
ha_init()785 int ha_init()
786 {
787 int error= 0;
788 DBUG_ENTER("ha_init");
789
790 DBUG_ASSERT(total_ha < MAX_HA);
791 /*
792 Check if there is a transaction-capable storage engine besides the
793 binary log (which is considered a transaction-capable storage engine in
794 counting total_ha)
795 */
796 opt_using_transactions= total_ha > (ulong) opt_bin_log;
797 savepoint_alloc_size+= sizeof(SAVEPOINT);
798 DBUG_RETURN(error);
799 }
800
ha_end()801 int ha_end()
802 {
803 int error= 0;
804 DBUG_ENTER("ha_end");
805
806 /*
807 This should be eventually based on the graceful shutdown flag.
808 So if flag is equal to HA_PANIC_CLOSE, the deallocate
809 the errors.
810 */
811 if (unlikely(ha_finish_errors()))
812 error= 1;
813
814 DBUG_RETURN(error);
815 }
816
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)817 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
818 void *path)
819 {
820 handlerton *hton= plugin_hton(plugin);
821 if (hton->drop_database)
822 hton->drop_database(hton, (char *)path);
823 return FALSE;
824 }
825
826
ha_drop_database(char * path)827 void ha_drop_database(char* path)
828 {
829 plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
830 }
831
832
checkpoint_state_handlerton(THD * unused1,plugin_ref plugin,void * disable)833 static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
834 void *disable)
835 {
836 handlerton *hton= plugin_hton(plugin);
837 if (hton->checkpoint_state)
838 hton->checkpoint_state(hton, (int) *(bool*) disable);
839 return FALSE;
840 }
841
842
ha_checkpoint_state(bool disable)843 void ha_checkpoint_state(bool disable)
844 {
845 plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
846 }
847
848
849 struct st_commit_checkpoint_request {
850 void *cookie;
851 void (*pre_hook)(void *);
852 };
853
commit_checkpoint_request_handlerton(THD * unused1,plugin_ref plugin,void * data)854 static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
855 void *data)
856 {
857 st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
858 handlerton *hton= plugin_hton(plugin);
859 if (hton->commit_checkpoint_request)
860 {
861 void *cookie= st->cookie;
862 if (st->pre_hook)
863 (*st->pre_hook)(cookie);
864 (*hton->commit_checkpoint_request)(cookie);
865 }
866 return FALSE;
867 }
868
869
870 /*
871 Invoke commit_checkpoint_request() in all storage engines that implement it.
872
873 If pre_hook is non-NULL, the hook will be called prior to each invocation.
874 */
875 void
ha_commit_checkpoint_request(void * cookie,void (* pre_hook)(void *))876 ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
877 {
878 st_commit_checkpoint_request st;
879 st.cookie= cookie;
880 st.pre_hook= pre_hook;
881 plugin_foreach(NULL, commit_checkpoint_request_handlerton,
882 MYSQL_STORAGE_ENGINE_PLUGIN, &st);
883 }
884
885
886 /**
887 @note
888 don't bother to rollback here, it's done already
889
890 there's no need to rollback here as all transactions must
891 be rolled back already
892 */
ha_close_connection(THD * thd)893 void ha_close_connection(THD* thd)
894 {
895 for (auto i= 0; i < MAX_HA; i++)
896 {
897 if (thd->ha_data[i].lock)
898 {
899 handlerton *hton= plugin_hton(thd->ha_data[i].lock);
900 if (hton->close_connection)
901 hton->close_connection(hton, thd);
902 /* make sure SE didn't reset ha_data in close_connection() */
903 DBUG_ASSERT(thd->ha_data[i].lock);
904 /* make sure ha_data is reset and ha_data_lock is released */
905 thd_set_ha_data(thd, hton, 0);
906 }
907 DBUG_ASSERT(!thd->ha_data[i].ha_ptr);
908 }
909 }
910
kill_handlerton(THD * thd,plugin_ref plugin,void * level)911 static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
912 void *level)
913 {
914 handlerton *hton= plugin_hton(plugin);
915
916 mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
917 if (hton->kill_query && thd_get_ha_data(thd, hton))
918 hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
919 return FALSE;
920 }
921
ha_kill_query(THD * thd,enum thd_kill_levels level)922 void ha_kill_query(THD* thd, enum thd_kill_levels level)
923 {
924 DBUG_ENTER("ha_kill_query");
925 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level);
926 DBUG_VOID_RETURN;
927 }
928
929
930 /*****************************************************************************
931 Backup functions
932 ******************************************************************************/
933
plugin_prepare_for_backup(THD * unused1,plugin_ref plugin,void * not_used)934 static my_bool plugin_prepare_for_backup(THD *unused1, plugin_ref plugin,
935 void *not_used)
936 {
937 handlerton *hton= plugin_hton(plugin);
938 if (hton->prepare_for_backup)
939 hton->prepare_for_backup();
940 return FALSE;
941 }
942
ha_prepare_for_backup()943 void ha_prepare_for_backup()
944 {
945 plugin_foreach_with_mask(0, plugin_prepare_for_backup,
946 MYSQL_STORAGE_ENGINE_PLUGIN,
947 PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
948 }
949
plugin_end_backup(THD * unused1,plugin_ref plugin,void * not_used)950 static my_bool plugin_end_backup(THD *unused1, plugin_ref plugin,
951 void *not_used)
952 {
953 handlerton *hton= plugin_hton(plugin);
954 if (hton->end_backup)
955 hton->end_backup();
956 return FALSE;
957 }
958
ha_end_backup()959 void ha_end_backup()
960 {
961 plugin_foreach_with_mask(0, plugin_end_backup,
962 MYSQL_STORAGE_ENGINE_PLUGIN,
963 PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
964 }
965
966
967 /*
968 Inform plugin of the server shutdown.
969 Called after all connections are down.
970
971 Under some circumstances, storage engine might need to
972 so some work, before deinit() can be safely called.
973 (an example is Innodb purge that might call into server
974 to calculate virtual columns, which might potentially also
975 invoke other plugins, such as audit
976 */
plugin_pre_shutdown(THD *,plugin_ref plugin,void *)977 static my_bool plugin_pre_shutdown(THD *, plugin_ref plugin, void *)
978 {
979 handlerton *hton= plugin_hton(plugin);
980 if (hton->pre_shutdown)
981 hton->pre_shutdown();
982 return FALSE;
983 }
984
985
ha_pre_shutdown()986 void ha_pre_shutdown()
987 {
988 plugin_foreach_with_mask(0, plugin_pre_shutdown,
989 MYSQL_STORAGE_ENGINE_PLUGIN,
990 PLUGIN_IS_DELETED | PLUGIN_IS_READY, 0);
991 }
992
993
994 /* ========================================================================
995 ======================= TRANSACTIONS ===================================*/
996
997 /**
998 Transaction handling in the server
999 ==================================
1000
1001 In each client connection, MySQL maintains two transactional
1002 states:
1003 - a statement transaction,
1004 - a standard, also called normal transaction.
1005
1006 Historical note
1007 ---------------
1008 "Statement transaction" is a non-standard term that comes
1009 from the times when MySQL supported BerkeleyDB storage engine.
1010
1011 First of all, it should be said that in BerkeleyDB auto-commit
1012 mode auto-commits operations that are atomic to the storage
1013 engine itself, such as a write of a record, and are too
1014 high-granular to be atomic from the application perspective
1015 (MySQL). One SQL statement could involve many BerkeleyDB
1016 auto-committed operations and thus BerkeleyDB auto-commit was of
1017 little use to MySQL.
1018
1019 Secondly, instead of SQL standard savepoints, BerkeleyDB
1020 provided the concept of "nested transactions". In a nutshell,
1021 transactions could be arbitrarily nested, but when the parent
1022 transaction was committed or aborted, all its child (nested)
1023 transactions were handled committed or aborted as well.
1024 Commit of a nested transaction, in turn, made its changes
1025 visible, but not durable: it destroyed the nested transaction,
1026 all its changes would become available to the parent and
1027 currently active nested transactions of this parent.
1028
1029 So the mechanism of nested transactions was employed to
1030 provide "all or nothing" guarantee of SQL statements
1031 required by the standard.
1032 A nested transaction would be created at start of each SQL
1033 statement, and destroyed (committed or aborted) at statement
1034 end. Such nested transaction was internally referred to as
1035 a "statement transaction" and gave birth to the term.
1036
1037 (Historical note ends)
1038
1039 Since then a statement transaction is started for each statement
1040 that accesses transactional tables or uses the binary log. If
1041 the statement succeeds, the statement transaction is committed.
1042 If the statement fails, the transaction is rolled back. Commits
1043 of statement transactions are not durable -- each such
1044 transaction is nested in the normal transaction, and if the
1045 normal transaction is rolled back, the effects of all enclosed
1046 statement transactions are undone as well. Technically,
1047 a statement transaction can be viewed as a savepoint which is
1048 maintained automatically in order to make effects of one
1049 statement atomic.
1050
1051 The normal transaction is started by the user and is ended
1052 usually upon a user request as well. The normal transaction
1053 encloses transactions of all statements issued between
1054 its beginning and its end.
1055 In autocommit mode, the normal transaction is equivalent
1056 to the statement transaction.
1057
1058 Since MySQL supports PSEA (pluggable storage engine
1059 architecture), more than one transactional engine can be
1060 active at a time. Hence transactions, from the server
1061 point of view, are always distributed. In particular,
1062 transactional state is maintained independently for each
1063 engine. In order to commit a transaction the two phase
1064 commit protocol is employed.
1065
1066 Not all statements are executed in context of a transaction.
1067 Administrative and status information statements do not modify
1068 engine data, and thus do not start a statement transaction and
1069 also have no effect on the normal transaction. Examples of such
1070 statements are SHOW STATUS and RESET SLAVE.
1071
1072 Similarly DDL statements are not transactional,
1073 and therefore a transaction is [almost] never started for a DDL
1074 statement. The difference between a DDL statement and a purely
1075 administrative statement though is that a DDL statement always
1076 commits the current transaction before proceeding, if there is
1077 any.
1078
1079 At last, SQL statements that work with non-transactional
1080 engines also have no effect on the transaction state of the
1081 connection. Even though they are written to the binary log,
1082 and the binary log is, overall, transactional, the writes
1083 are done in "write-through" mode, directly to the binlog
1084 file, followed with a OS cache sync, in other words,
1085 bypassing the binlog undo log (translog).
1086 They do not commit the current normal transaction.
1087 A failure of a statement that uses non-transactional tables
1088 would cause a rollback of the statement transaction, but
1089 in case there no non-transactional tables are used,
1090 no statement transaction is started.
1091
1092 Data layout
1093 -----------
1094
1095 The server stores its transaction-related data in
1096 thd->transaction. This structure has two members of type
1097 THD_TRANS. These members correspond to the statement and
1098 normal transactions respectively:
1099
1100 - thd->transaction.stmt contains a list of engines
1101 that are participating in the given statement
1102 - thd->transaction.all contains a list of engines that
1103 have participated in any of the statement transactions started
1104 within the context of the normal transaction.
1105 Each element of the list contains a pointer to the storage
1106 engine, engine-specific transactional data, and engine-specific
1107 transaction flags.
1108
1109 In autocommit mode thd->transaction.all is empty.
1110 Instead, data of thd->transaction.stmt is
1111 used to commit/rollback the normal transaction.
1112
1113 The list of registered engines has a few important properties:
1114 - no engine is registered in the list twice
1115 - engines are present in the list a reverse temporal order --
1116 new participants are always added to the beginning of the list.
1117
1118 Transaction life cycle
1119 ----------------------
1120
1121 When a new connection is established, thd->transaction
1122 members are initialized to an empty state.
1123 If a statement uses any tables, all affected engines
1124 are registered in the statement engine list. In
1125 non-autocommit mode, the same engines are registered in
1126 the normal transaction list.
1127 At the end of the statement, the server issues a commit
1128 or a roll back for all engines in the statement list.
1129 At this point transaction flags of an engine, if any, are
1130 propagated from the statement list to the list of the normal
1131 transaction.
1132 When commit/rollback is finished, the statement list is
1133 cleared. It will be filled in again by the next statement,
1134 and emptied again at the next statement's end.
1135
1136 The normal transaction is committed in a similar way
1137 (by going over all engines in thd->transaction.all list)
1138 but at different times:
1139 - upon COMMIT SQL statement is issued by the user
1140 - implicitly, by the server, at the beginning of a DDL statement
1141 or SET AUTOCOMMIT={0|1} statement.
1142
1143 The normal transaction can be rolled back as well:
1144 - if the user has requested so, by issuing ROLLBACK SQL
1145 statement
1146 - if one of the storage engines requested a rollback
1147 by setting thd->transaction_rollback_request. This may
1148 happen in case, e.g., when the transaction in the engine was
1149 chosen a victim of the internal deadlock resolution algorithm
1150 and rolled back internally. When such a situation happens, there
1151 is little the server can do and the only option is to rollback
1152 transactions in all other participating engines. In this case
1153 the rollback is accompanied by an error sent to the user.
1154
1155 As follows from the use cases above, the normal transaction
1156 is never committed when there is an outstanding statement
1157 transaction. In most cases there is no conflict, since
1158 commits of the normal transaction are issued by a stand-alone
1159 administrative or DDL statement, thus no outstanding statement
1160 transaction of the previous statement exists. Besides,
1161 all statements that manipulate with the normal transaction
1162 are prohibited in stored functions and triggers, therefore
1163 no conflicting situation can occur in a sub-statement either.
1164 The remaining rare cases when the server explicitly has
1165 to commit the statement transaction prior to committing the normal
1166 one cover error-handling scenarios (see for example
1167 SQLCOM_LOCK_TABLES).
1168
1169 When committing a statement or a normal transaction, the server
1170 either uses the two-phase commit protocol, or issues a commit
1171 in each engine independently. The two-phase commit protocol
1172 is used only if:
1173 - all participating engines support two-phase commit (provide
1174 handlerton::prepare PSEA API call) and
1175 - transactions in at least two engines modify data (i.e. are
1176 not read-only).
1177
1178 Note that the two phase commit is used for
1179 statement transactions, even though they are not durable anyway.
1180 This is done to ensure logical consistency of data in a multiple-
1181 engine transaction.
1182 For example, imagine that some day MySQL supports unique
1183 constraint checks deferred till the end of statement. In such
1184 case a commit in one of the engines may yield ER_DUP_KEY,
1185 and MySQL should be able to gracefully abort statement
1186 transactions of other participants.
1187
1188 After the normal transaction has been committed,
1189 thd->transaction.all list is cleared.
1190
1191 When a connection is closed, the current normal transaction, if
1192 any, is rolled back.
1193
1194 Roles and responsibilities
1195 --------------------------
1196
1197 The server has no way to know that an engine participates in
1198 the statement and a transaction has been started
1199 in it unless the engine says so. Thus, in order to be
1200 a part of a transaction, the engine must "register" itself.
1201 This is done by invoking trans_register_ha() server call.
1202 Normally the engine registers itself whenever handler::external_lock()
1203 is called. trans_register_ha() can be invoked many times: if
1204 an engine is already registered, the call does nothing.
1205 In case autocommit is not set, the engine must register itself
1206 twice -- both in the statement list and in the normal transaction
1207 list.
1208 In which list to register is a parameter of trans_register_ha().
1209
1210 Note, that although the registration interface in itself is
1211 fairly clear, the current usage practice often leads to undesired
1212 effects. E.g. since a call to trans_register_ha() in most engines
1213 is embedded into implementation of handler::external_lock(), some
1214 DDL statements start a transaction (at least from the server
1215 point of view) even though they are not expected to. E.g.
1216 CREATE TABLE does not start a transaction, since
1217 handler::external_lock() is never called during CREATE TABLE. But
1218 CREATE TABLE ... SELECT does, since handler::external_lock() is
1219 called for the table that is being selected from. This has no
1220 practical effects currently, but must be kept in mind
1221 nevertheless.
1222
1223 Once an engine is registered, the server will do the rest
1224 of the work.
1225
1226 During statement execution, whenever any of data-modifying
1227 PSEA API methods is used, e.g. handler::write_row() or
1228 handler::update_row(), the read-write flag is raised in the
1229 statement transaction for the involved engine.
1230 Currently All PSEA calls are "traced", and the data can not be
1231 changed in a way other than issuing a PSEA call. Important:
1232 unless this invariant is preserved the server will not know that
1233 a transaction in a given engine is read-write and will not
1234 involve the two-phase commit protocol!
1235
1236 At the end of a statement, server call trans_commit_stmt is
1237 invoked. This call in turn invokes handlerton::prepare()
1238 for every involved engine. Prepare is followed by a call
1239 to handlerton::commit_one_phase() If a one-phase commit
1240 will suffice, handlerton::prepare() is not invoked and
1241 the server only calls handlerton::commit_one_phase().
1242 At statement commit, the statement-related read-write
1243 engine flag is propagated to the corresponding flag in the
1244 normal transaction. When the commit is complete, the list
1245 of registered engines is cleared.
1246
1247 Rollback is handled in a similar fashion.
1248
1249 Additional notes on DDL and the normal transaction.
1250 ---------------------------------------------------
1251
1252 DDLs and operations with non-transactional engines
1253 do not "register" in thd->transaction lists, and thus do not
1254 modify the transaction state. Besides, each DDL in
1255 MySQL is prefixed with an implicit normal transaction commit
1256 (a call to trans_commit_implicit()), and thus leaves nothing
1257 to modify.
1258 However, as it has been pointed out with CREATE TABLE .. SELECT,
1259 some DDL statements can start a *new* transaction.
1260
1261 Behaviour of the server in this case is currently badly
1262 defined.
1263 DDL statements use a form of "semantic" logging
1264 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1265 the newly created table is deleted.
1266 In addition, some DDL statements issue interim transaction
1267 commits: e.g. ALTER TABLE issues a commit after data is copied
1268 from the original table to the internal temporary table. Other
1269 statements, e.g. CREATE TABLE ... SELECT do not always commit
1270 after itself.
1271 And finally there is a group of DDL statements such as
1272 RENAME/DROP TABLE that doesn't start a new transaction
1273 and doesn't commit.
1274
1275 This diversity makes it hard to say what will happen if
1276 by chance a stored function is invoked during a DDL --
1277 whether any modifications it makes will be committed or not
1278 is not clear. Fortunately, SQL grammar of few DDLs allows
1279 invocation of a stored function.
1280
1281 A consistent behaviour is perhaps to always commit the normal
1282 transaction after all DDLs, just like the statement transaction
1283 is always committed at the end of all statements.
1284 */
1285
1286 /**
1287 Register a storage engine for a transaction.
1288
1289 Every storage engine MUST call this function when it starts
1290 a transaction or a statement (that is it must be called both for the
1291 "beginning of transaction" and "beginning of statement").
1292 Only storage engines registered for the transaction/statement
1293 will know when to commit/rollback it.
1294
1295 @note
1296 trans_register_ha is idempotent - storage engine may register many
1297 times per transaction.
1298
1299 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,ulonglong trxid)1300 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, ulonglong trxid)
1301 {
1302 THD_TRANS *trans;
1303 Ha_trx_info *ha_info;
1304 DBUG_ENTER("trans_register_ha");
1305 DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1306
1307 if (all)
1308 {
1309 trans= &thd->transaction->all;
1310 thd->server_status|= SERVER_STATUS_IN_TRANS;
1311 if (thd->tx_read_only)
1312 thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1313 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1314 }
1315 else
1316 trans= &thd->transaction->stmt;
1317
1318 ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1319
1320 if (ha_info->is_started())
1321 DBUG_VOID_RETURN; /* already registered, return */
1322
1323 ha_info->register_ha(trans, ht_arg);
1324
1325 trans->no_2pc|=(ht_arg->prepare==0);
1326
1327 /* Set implicit xid even if there's explicit XA, it will be ignored anyway. */
1328 if (thd->transaction->implicit_xid.is_null())
1329 thd->transaction->implicit_xid.set(thd->query_id);
1330
1331 /*
1332 Register transaction start in performance schema if not done already.
1333 By doing this, we handle cases when the transaction is started implicitly in
1334 autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1335 executed statement is a single-statement transaction.
1336
1337 Explicitly started transactions are handled in trans_begin().
1338
1339 Do not register transactions in which binary log is the only participating
1340 transactional storage engine.
1341 */
1342 if (thd->m_transaction_psi == NULL && ht_arg->db_type != DB_TYPE_BINLOG)
1343 {
1344 thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1345 thd->get_xid(), trxid, thd->tx_isolation, thd->tx_read_only,
1346 !thd->in_multi_stmt_transaction_mode());
1347 DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1348 //gtid_set_performance_schema_values(thd);
1349 }
1350 DBUG_VOID_RETURN;
1351 }
1352
1353
prepare_or_error(handlerton * ht,THD * thd,bool all)1354 static int prepare_or_error(handlerton *ht, THD *thd, bool all)
1355 {
1356 #ifdef WITH_WSREP
1357 const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1358 if (run_wsrep_hooks && ht->flags & HTON_WSREP_REPLICATION &&
1359 wsrep_before_prepare(thd, all))
1360 {
1361 return(1);
1362 }
1363 #endif /* WITH_WSREP */
1364
1365 int err= ht->prepare(ht, thd, all);
1366 status_var_increment(thd->status_var.ha_prepare_count);
1367 if (err)
1368 {
1369 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1370 }
1371 #ifdef WITH_WSREP
1372 if (run_wsrep_hooks && !err && ht->flags & HTON_WSREP_REPLICATION &&
1373 wsrep_after_prepare(thd, all))
1374 {
1375 err= 1;
1376 }
1377 #endif /* WITH_WSREP */
1378
1379 return err;
1380 }
1381
1382
1383 /**
1384 @retval
1385 0 ok
1386 @retval
1387 1 error, transaction was rolled back
1388 */
ha_prepare(THD * thd)1389 int ha_prepare(THD *thd)
1390 {
1391 int error=0, all=1;
1392 THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1393 Ha_trx_info *ha_info= trans->ha_list;
1394 DBUG_ENTER("ha_prepare");
1395
1396 if (ha_info)
1397 {
1398 for (; ha_info; ha_info= ha_info->next())
1399 {
1400 handlerton *ht= ha_info->ht();
1401 if (ht->prepare)
1402 {
1403 if (unlikely(prepare_or_error(ht, thd, all)))
1404 {
1405 ha_rollback_trans(thd, all);
1406 error=1;
1407 break;
1408 }
1409 }
1410 else
1411 {
1412 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1413 ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO),
1414 HA_ERR_WRONG_COMMAND,
1415 ha_resolve_storage_engine_name(ht));
1416
1417 }
1418 }
1419
1420 DEBUG_SYNC(thd, "at_unlog_xa_prepare");
1421
1422 if (tc_log->unlog_xa_prepare(thd, all))
1423 {
1424 ha_rollback_trans(thd, all);
1425 error=1;
1426 }
1427 }
1428
1429 DBUG_RETURN(error);
1430 }
1431
1432 /*
1433 Like ha_check_and_coalesce_trx_read_only to return counted number of
1434 read-write transaction participants limited to two, but works in the 'all'
1435 context.
1436 Also returns the last found rw ha_info through the 2nd argument.
1437 */
ha_count_rw_all(THD * thd,Ha_trx_info ** ptr_ha_info)1438 uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info)
1439 {
1440 unsigned rw_ha_count= 0;
1441
1442 for (auto ha_info= thd->transaction->all.ha_list; ha_info;
1443 ha_info= ha_info->next())
1444 {
1445 if (ha_info->is_trx_read_write())
1446 {
1447 *ptr_ha_info= ha_info;
1448 if (++rw_ha_count > 1)
1449 break;
1450 }
1451 }
1452 return rw_ha_count;
1453 }
1454
1455 /**
1456 Check if we can skip the two-phase commit.
1457
1458 A helper function to evaluate if two-phase commit is mandatory.
1459 As a side effect, propagates the read-only/read-write flags
1460 of the statement transaction to its enclosing normal transaction.
1461
1462 If we have at least two engines with read-write changes we must
1463 run a two-phase commit. Otherwise we can run several independent
1464 commits as the only transactional engine has read-write changes
1465 and others are read-only.
1466
1467 @retval 0 All engines are read-only.
1468 @retval 1 We have the only engine with read-write changes.
1469 @retval >1 More than one engine have read-write changes.
1470 Note: return value might NOT be the exact number of
1471 engines with read-write changes.
1472 */
1473
1474 static
1475 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1476 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1477 bool all)
1478 {
1479 /* The number of storage engines that have actual changes. */
1480 unsigned rw_ha_count= 0;
1481 Ha_trx_info *ha_info;
1482
1483 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1484 {
1485 if (ha_info->is_trx_read_write())
1486 ++rw_ha_count;
1487
1488 if (! all)
1489 {
1490 Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1491 DBUG_ASSERT(ha_info != ha_info_all);
1492 /*
1493 Merge read-only/read-write information about statement
1494 transaction to its enclosing normal transaction. Do this
1495 only if in a real transaction -- that is, if we know
1496 that ha_info_all is registered in thd->transaction.all.
1497 Since otherwise we only clutter the normal transaction flags.
1498 */
1499 if (ha_info_all->is_started()) /* FALSE if autocommit. */
1500 ha_info_all->coalesce_trx_with(ha_info);
1501 }
1502 else if (rw_ha_count > 1)
1503 {
1504 /*
1505 It is a normal transaction, so we don't need to merge read/write
1506 information up, and the need for two-phase commit has been
1507 already established. Break the loop prematurely.
1508 */
1509 break;
1510 }
1511 }
1512 return rw_ha_count;
1513 }
1514
1515
1516 /**
1517 @retval
1518 0 ok
1519 @retval
1520 1 transaction was rolled back
1521 @retval
1522 2 error during commit, data may be inconsistent
1523
1524 @todo
1525 Since we don't support nested statement transactions in 5.0,
1526 we can't commit or rollback stmt transactions while we are inside
1527 stored functions or triggers. So we simply do nothing now.
1528 TODO: This should be fixed in later ( >= 5.1) releases.
1529 */
ha_commit_trans(THD * thd,bool all)1530 int ha_commit_trans(THD *thd, bool all)
1531 {
1532 int error= 0, cookie;
1533 /*
1534 'all' means that this is either an explicit commit issued by
1535 user, or an implicit commit issued by a DDL.
1536 */
1537 THD_TRANS *trans= all ? &thd->transaction->all : &thd->transaction->stmt;
1538 /*
1539 "real" is a nick name for a transaction for which a commit will
1540 make persistent changes. E.g. a 'stmt' transaction inside an 'all'
1541 transaction is not 'real': even though it's possible to commit it,
1542 the changes are not durable as they might be rolled back if the
1543 enclosing 'all' transaction is rolled back.
1544 */
1545 bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) &&
1546 !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1547 Ha_trx_info *ha_info= trans->ha_list;
1548 bool need_prepare_ordered, need_commit_ordered;
1549 my_xid xid;
1550 #ifdef WITH_WSREP
1551 const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1552 #endif /* WITH_WSREP */
1553 DBUG_ENTER("ha_commit_trans");
1554 DBUG_PRINT("info",("thd: %p option_bits: %lu all: %d",
1555 thd, (ulong) thd->variables.option_bits, all));
1556
1557 /* Just a random warning to test warnings pushed during autocommit. */
1558 DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
1559 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1560 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1561 ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)););
1562
1563 DBUG_PRINT("info",
1564 ("all: %d thd->in_sub_stmt: %d ha_info: %p is_real_trans: %d",
1565 all, thd->in_sub_stmt, ha_info, is_real_trans));
1566 /*
1567 We must not commit the normal transaction if a statement
1568 transaction is pending. Otherwise statement transaction
1569 flags will not get propagated to its normal transaction's
1570 counterpart.
1571 */
1572 DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL ||
1573 trans == &thd->transaction->stmt);
1574
1575 if (thd->in_sub_stmt)
1576 {
1577 DBUG_ASSERT(0);
1578 /*
1579 Since we don't support nested statement transactions in 5.0,
1580 we can't commit or rollback stmt transactions while we are inside
1581 stored functions or triggers. So we simply do nothing now.
1582 TODO: This should be fixed in later ( >= 5.1) releases.
1583 */
1584 if (!all)
1585 DBUG_RETURN(0);
1586 /*
1587 We assume that all statements which commit or rollback main transaction
1588 are prohibited inside of stored functions or triggers. So they should
1589 bail out with error even before ha_commit_trans() call. To be 100% safe
1590 let us throw error in non-debug builds.
1591 */
1592 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1593 DBUG_RETURN(2);
1594 }
1595
1596 if (!ha_info)
1597 {
1598 /*
1599 Free resources and perform other cleanup even for 'empty' transactions.
1600 */
1601 if (is_real_trans)
1602 {
1603 thd->transaction->cleanup();
1604 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1605 thd->m_transaction_psi= NULL;
1606 }
1607 #ifdef WITH_WSREP
1608 if (wsrep_is_active(thd) && is_real_trans && !error)
1609 wsrep_commit_empty(thd, all);
1610 #endif /* WITH_WSREP */
1611
1612 DBUG_RETURN(0);
1613 }
1614
1615 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1616
1617 /* Close all cursors that can not survive COMMIT */
1618 if (is_real_trans) /* not a statement commit */
1619 thd->stmt_map.close_transient_cursors();
1620
1621 uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1622 /* rw_trans is TRUE when we in a transaction changing data */
1623 bool rw_trans= is_real_trans &&
1624 (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
1625 MDL_request mdl_backup;
1626 DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
1627 is_real_trans, rw_trans, rw_ha_count));
1628
1629 if (rw_trans)
1630 {
1631 /*
1632 Acquire a metadata lock which will ensure that COMMIT is blocked
1633 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1634 COMMIT in progress blocks FTWRL).
1635
1636 We allow the owner of FTWRL to COMMIT; we assume that it knows
1637 what it does.
1638 */
1639 MDL_REQUEST_INIT(&mdl_backup, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
1640 MDL_EXPLICIT);
1641
1642 if (!WSREP(thd))
1643 {
1644 if (thd->mdl_context.acquire_lock(&mdl_backup,
1645 thd->variables.lock_wait_timeout))
1646 {
1647 my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1);
1648 ha_rollback_trans(thd, all);
1649 DBUG_RETURN(1);
1650 }
1651 thd->backup_commit_lock= &mdl_backup;
1652 }
1653 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1654 }
1655
1656 if (rw_trans &&
1657 opt_readonly &&
1658 !(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) &&
1659 !thd->slave_thread)
1660 {
1661 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1662 goto err;
1663 }
1664
1665 #if 1 // FIXME: This should be done in ha_prepare().
1666 if (rw_trans || (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
1667 thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING &&
1668 is_real_trans))
1669 {
1670 ulonglong trx_start_id= 0, trx_end_id= 0;
1671 for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
1672 {
1673 if (ha_info->ht()->prepare_commit_versioned)
1674 {
1675 trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id);
1676 if (trx_end_id)
1677 break; // FIXME: use a common ID for cross-engine transactions
1678 }
1679 }
1680
1681 if (trx_end_id)
1682 {
1683 if (!TR_table::use_transaction_registry)
1684 {
1685 my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
1686 goto err;
1687 }
1688 DBUG_ASSERT(trx_start_id);
1689 #ifdef WITH_WSREP
1690 bool saved_wsrep_on= thd->variables.wsrep_on;
1691 thd->variables.wsrep_on= false;
1692 #endif
1693 TR_table trt(thd, true);
1694 if (trt.update(trx_start_id, trx_end_id))
1695 {
1696 #ifdef WITH_WSREP
1697 thd->variables.wsrep_on= saved_wsrep_on;
1698 #endif
1699 (void) trans_rollback_stmt(thd);
1700 goto err;
1701 }
1702 // Here, the call will not commit inside InnoDB. It is only working
1703 // around closing thd->transaction.stmt open by TR_table::open().
1704 if (all)
1705 commit_one_phase_2(thd, false, &thd->transaction->stmt, false);
1706 #ifdef WITH_WSREP
1707 thd->variables.wsrep_on= saved_wsrep_on;
1708 #endif
1709 }
1710 }
1711 #endif
1712
1713 if (trans->no_2pc || (rw_ha_count <= 1))
1714 {
1715 #ifdef WITH_WSREP
1716 /*
1717 This commit will not go through log_and_order() where wsrep commit
1718 ordering is normally done. Commit ordering must be done here.
1719 */
1720 if (run_wsrep_hooks)
1721 error= wsrep_before_commit(thd, all);
1722 if (error)
1723 {
1724 ha_rollback_trans(thd, FALSE);
1725 goto wsrep_err;
1726 }
1727 #endif /* WITH_WSREP */
1728 error= ha_commit_one_phase(thd, all);
1729 #ifdef WITH_WSREP
1730 // Here in case of error we must return 2 for inconsistency
1731 if (run_wsrep_hooks && !error)
1732 error= wsrep_after_commit(thd, all) ? 2 : 0;
1733 #endif /* WITH_WSREP */
1734 goto done;
1735 }
1736
1737 need_prepare_ordered= FALSE;
1738 need_commit_ordered= FALSE;
1739
1740 for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
1741 {
1742 handlerton *ht= hi->ht();
1743 /*
1744 Do not call two-phase commit if this particular
1745 transaction is read-only. This allows for simpler
1746 implementation in engines that are always read-only.
1747 */
1748 if (! hi->is_trx_read_write())
1749 continue;
1750 /*
1751 Sic: we know that prepare() is not NULL since otherwise
1752 trans->no_2pc would have been set.
1753 */
1754 if (unlikely(prepare_or_error(ht, thd, all)))
1755 goto err;
1756
1757 need_prepare_ordered|= (ht->prepare_ordered != NULL);
1758 need_commit_ordered|= (ht->commit_ordered != NULL);
1759 }
1760 DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
1761 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
1762
1763 if (!is_real_trans)
1764 {
1765 error= commit_one_phase_2(thd, all, trans, is_real_trans);
1766 goto done;
1767 }
1768
1769 DBUG_ASSERT(thd->transaction->implicit_xid.get_my_xid() ==
1770 thd->transaction->implicit_xid.quick_get_my_xid());
1771 DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA() ||
1772 thd->lex->xa_opt == XA_ONE_PHASE);
1773 xid= thd->transaction->implicit_xid.quick_get_my_xid();
1774
1775 #ifdef WITH_WSREP
1776 if (run_wsrep_hooks && !error)
1777 {
1778 wsrep::seqno const s= wsrep_xid_seqno(thd->wsrep_xid);
1779 if (!s.is_undefined())
1780 {
1781 // xid was rewritten by wsrep
1782 xid= s.get();
1783 }
1784 }
1785 if (run_wsrep_hooks && (error = wsrep_before_commit(thd, all)))
1786 goto wsrep_err;
1787 #endif /* WITH_WSREP */
1788 DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
1789 cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
1790 need_commit_ordered);
1791 if (!cookie)
1792 {
1793 WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie);
1794 goto err;
1795 }
1796 DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
1797 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
1798
1799 error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
1800 #ifdef WITH_WSREP
1801 if (run_wsrep_hooks &&
1802 (error || (error = wsrep_after_commit(thd, all))))
1803 {
1804 error = 2;
1805 mysql_mutex_lock(&thd->LOCK_thd_data);
1806 if (wsrep_must_abort(thd))
1807 {
1808 mysql_mutex_unlock(&thd->LOCK_thd_data);
1809 (void)tc_log->unlog(cookie, xid);
1810 goto wsrep_err;
1811 }
1812 mysql_mutex_unlock(&thd->LOCK_thd_data);
1813 }
1814 #endif /* WITH_WSREP */
1815 DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
1816 if (tc_log->unlog(cookie, xid))
1817 error= 2; /* Error during commit */
1818
1819 done:
1820 if (is_real_trans)
1821 {
1822 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1823 thd->m_transaction_psi= NULL;
1824 }
1825
1826 DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1827
1828 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
1829 mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
1830 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
1831 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
1832 #ifdef HAVE_REPLICATION
1833 repl_semisync_master.wait_after_commit(thd, all);
1834 DEBUG_SYNC(thd, "after_group_after_commit");
1835 #endif
1836 goto end;
1837
1838 /* Come here if error and we need to rollback. */
1839 #ifdef WITH_WSREP
1840 wsrep_err:
1841 mysql_mutex_lock(&thd->LOCK_thd_data);
1842 if (run_wsrep_hooks && wsrep_must_abort(thd))
1843 {
1844 WSREP_DEBUG("BF abort has happened after prepare & certify");
1845 mysql_mutex_unlock(&thd->LOCK_thd_data);
1846 ha_rollback_trans(thd, TRUE);
1847 }
1848 else
1849 mysql_mutex_unlock(&thd->LOCK_thd_data);
1850
1851 #endif /* WITH_WSREP */
1852 err:
1853 error= 1; /* Transaction was rolled back */
1854 /*
1855 In parallel replication, rollback is delayed, as there is extra replication
1856 book-keeping to be done before rolling back and allowing a conflicting
1857 transaction to continue (MDEV-7458).
1858 */
1859 if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec))
1860 ha_rollback_trans(thd, all);
1861 else
1862 {
1863 /*
1864 We are not really doing a rollback here, but the code in trans_commit()
1865 requres that m_transaction_psi is 0 when we return from this function.
1866 */
1867 MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
1868 thd->m_transaction_psi= NULL;
1869 WSREP_DEBUG("rollback skipped %p %d",thd->rgi_slave,
1870 thd->rgi_slave->is_parallel_exec);
1871 }
1872 end:
1873 if (mdl_backup.ticket)
1874 {
1875 /*
1876 We do not always immediately release transactional locks
1877 after ha_commit_trans() (see uses of ha_enable_transaction()),
1878 thus we release the commit blocker lock as soon as it's
1879 not needed.
1880 */
1881 thd->mdl_context.release_lock(mdl_backup.ticket);
1882 }
1883 thd->backup_commit_lock= 0;
1884 #ifdef WITH_WSREP
1885 if (wsrep_is_active(thd) && is_real_trans && !error &&
1886 (rw_ha_count == 0 || all) &&
1887 wsrep_not_committed(thd))
1888 {
1889 wsrep_commit_empty(thd, all);
1890 }
1891 #endif /* WITH_WSREP */
1892
1893 DBUG_RETURN(error);
1894 }
1895
1896 /**
1897 @note
1898 This function does not care about global read lock or backup locks,
1899 the caller should.
1900
1901 @param[in] all Is set in case of explicit commit
1902 (COMMIT statement), or implicit commit
1903 issued by DDL. Is not set when called
1904 at the end of statement, even if
1905 autocommit=1.
1906 */
1907
ha_commit_one_phase(THD * thd,bool all)1908 int ha_commit_one_phase(THD *thd, bool all)
1909 {
1910 THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1911 /*
1912 "real" is a nick name for a transaction for which a commit will
1913 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1914 transaction is not 'real': even though it's possible to commit it,
1915 the changes are not durable as they might be rolled back if the
1916 enclosing 'all' transaction is rolled back.
1917 We establish the value of 'is_real_trans' by checking
1918 if it's an explicit COMMIT/BEGIN statement, or implicit
1919 commit issued by DDL (all == TRUE), or if we're running
1920 in autocommit mode (it's only in the autocommit mode
1921 ha_commit_one_phase() can be called with an empty
1922 transaction.all.ha_list, see why in trans_register_ha()).
1923 */
1924 bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) &&
1925 !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1926 int res;
1927 DBUG_ENTER("ha_commit_one_phase");
1928 if (is_real_trans)
1929 {
1930 DEBUG_SYNC(thd, "ha_commit_one_phase");
1931 if ((res= thd->wait_for_prior_commit()))
1932 DBUG_RETURN(res);
1933 }
1934 res= commit_one_phase_2(thd, all, trans, is_real_trans);
1935 DBUG_RETURN(res);
1936 }
1937
1938
1939 static int
commit_one_phase_2(THD * thd,bool all,THD_TRANS * trans,bool is_real_trans)1940 commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
1941 {
1942 int error= 0;
1943 uint count= 0;
1944 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1945 DBUG_ENTER("commit_one_phase_2");
1946 if (is_real_trans)
1947 DEBUG_SYNC(thd, "commit_one_phase_2");
1948
1949 if (ha_info)
1950 {
1951 for (; ha_info; ha_info= ha_info_next)
1952 {
1953 int err;
1954 handlerton *ht= ha_info->ht();
1955 if ((err= ht->commit(ht, thd, all)))
1956 {
1957 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1958 error=1;
1959 }
1960 /* Should this be done only if is_real_trans is set ? */
1961 status_var_increment(thd->status_var.ha_commit_count);
1962 if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write())
1963 ++count;
1964 ha_info_next= ha_info->next();
1965 ha_info->reset(); /* keep it conveniently zero-filled */
1966 }
1967 trans->ha_list= 0;
1968 trans->no_2pc=0;
1969 if (all)
1970 {
1971 #ifdef HAVE_QUERY_CACHE
1972 if (thd->transaction->changed_tables)
1973 query_cache.invalidate(thd, thd->transaction->changed_tables);
1974 #endif
1975 }
1976 }
1977
1978 /* Free resources and perform other cleanup even for 'empty' transactions. */
1979 if (is_real_trans)
1980 {
1981 thd->has_waiter= false;
1982 thd->transaction->cleanup();
1983 if (count >= 2)
1984 statistic_increment(transactions_multi_engine, LOCK_status);
1985 }
1986
1987 DBUG_RETURN(error);
1988 }
1989
1990
ha_rollback_trans(THD * thd,bool all)1991 int ha_rollback_trans(THD *thd, bool all)
1992 {
1993 int error=0;
1994 THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1995 Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1996 /*
1997 "real" is a nick name for a transaction for which a commit will
1998 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1999 transaction is not 'real': even though it's possible to commit it,
2000 the changes are not durable as they might be rolled back if the
2001 enclosing 'all' transaction is rolled back.
2002 We establish the value of 'is_real_trans' by checking
2003 if it's an explicit COMMIT or BEGIN statement, or implicit
2004 commit issued by DDL (in these cases all == TRUE),
2005 or if we're running in autocommit mode (it's only in the autocommit mode
2006 ha_commit_one_phase() is called with an empty
2007 transaction.all.ha_list, see why in trans_register_ha()).
2008 */
2009 bool is_real_trans=all || thd->transaction->all.ha_list == 0;
2010 DBUG_ENTER("ha_rollback_trans");
2011
2012 /*
2013 We must not rollback the normal transaction if a statement
2014 transaction is pending.
2015 */
2016 DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL ||
2017 trans == &thd->transaction->stmt);
2018
2019 #ifdef HAVE_REPLICATION
2020 if (is_real_trans)
2021 {
2022 /*
2023 In parallel replication, if we need to rollback during commit, we must
2024 first inform following transactions that we are going to abort our commit
2025 attempt. Otherwise those following transactions can run too early, and
2026 possibly cause replication to fail. See comments in retry_event_group().
2027
2028 There were several bugs with this in the past that were very hard to
2029 track down (MDEV-7458, MDEV-8302). So we add here an assertion for
2030 rollback without signalling following transactions. And in release
2031 builds, we explicitly do the signalling before rolling back.
2032 */
2033 DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) ||
2034 thd->transaction->xid_state.is_explicit_XA());
2035 if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
2036 thd->rgi_slave->unmark_start_commit();
2037 }
2038 #endif
2039
2040 if (thd->in_sub_stmt)
2041 {
2042 DBUG_ASSERT(0);
2043 /*
2044 If we are inside stored function or trigger we should not commit or
2045 rollback current statement transaction. See comment in ha_commit_trans()
2046 call for more information.
2047 */
2048 if (!all)
2049 DBUG_RETURN(0);
2050 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2051 DBUG_RETURN(1);
2052 }
2053
2054 #ifdef WITH_WSREP
2055 (void) wsrep_before_rollback(thd, all);
2056 #endif /* WITH_WSREP */
2057 if (ha_info)
2058 {
2059 /* Close all cursors that can not survive ROLLBACK */
2060 if (is_real_trans) /* not a statement commit */
2061 thd->stmt_map.close_transient_cursors();
2062
2063 for (; ha_info; ha_info= ha_info_next)
2064 {
2065 int err;
2066 handlerton *ht= ha_info->ht();
2067 if ((err= ht->rollback(ht, thd, all)))
2068 {
2069 // cannot happen
2070 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2071 error=1;
2072 #ifdef WITH_WSREP
2073 WSREP_WARN("handlerton rollback failed, thd %lld %lld conf %d SQL %s",
2074 thd->thread_id, thd->query_id, thd->wsrep_trx().state(),
2075 thd->query());
2076 #endif /* WITH_WSREP */
2077 }
2078 status_var_increment(thd->status_var.ha_rollback_count);
2079 ha_info_next= ha_info->next();
2080 ha_info->reset(); /* keep it conveniently zero-filled */
2081 }
2082 trans->ha_list= 0;
2083 trans->no_2pc=0;
2084 }
2085
2086 #ifdef WITH_WSREP
2087 if (thd->is_error())
2088 {
2089 WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: %s: %s; is_real %d",
2090 thd->thread_id, all?"TRUE":"FALSE", wsrep_thd_query(thd),
2091 thd->get_stmt_da()->message(), is_real_trans);
2092 }
2093 (void) wsrep_after_rollback(thd, all);
2094 #endif /* WITH_WSREP */
2095
2096 if (all || !thd->in_active_multi_stmt_transaction())
2097 {
2098 MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2099 thd->m_transaction_psi= NULL;
2100 }
2101
2102 /* Always cleanup. Even if nht==0. There may be savepoints. */
2103 if (is_real_trans)
2104 {
2105 /*
2106 Thanks to possibility of MDL deadlock rollback request can come even if
2107 transaction hasn't been started in any transactional storage engine.
2108 */
2109 if (thd->transaction_rollback_request &&
2110 thd->transaction->xid_state.is_explicit_XA())
2111 thd->transaction->xid_state.set_error(thd->get_stmt_da()->sql_errno());
2112
2113 thd->has_waiter= false;
2114 thd->transaction->cleanup();
2115 }
2116 if (all)
2117 thd->transaction_rollback_request= FALSE;
2118
2119 /*
2120 If a non-transactional table was updated, warn; don't warn if this is a
2121 slave thread (because when a slave thread executes a ROLLBACK, it has
2122 been read from the binary log, so it's 100% sure and normal to produce
2123 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2124 slave SQL thread, it would not stop the thread but just be printed in
2125 the error log; but we don't want users to wonder why they have this
2126 message in the error log, so we don't send it.
2127
2128 We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
2129 it doesn't matter if a warning is pushed to a system thread or not:
2130 No one will see it...
2131 */
2132 if (is_real_trans && thd->transaction->all.modified_non_trans_table &&
2133 !thd->slave_thread && thd->killed < KILL_CONNECTION)
2134 push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
2135 ER_WARNING_NOT_COMPLETE_ROLLBACK,
2136 ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));
2137 #ifdef HAVE_REPLICATION
2138 repl_semisync_master.wait_after_rollback(thd, all);
2139 #endif
2140 DBUG_RETURN(error);
2141 }
2142
2143
2144 struct xahton_st {
2145 XID *xid;
2146 int result;
2147 };
2148
xacommit_handlerton(THD * unused1,plugin_ref plugin,void * arg)2149 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
2150 void *arg)
2151 {
2152 handlerton *hton= plugin_hton(plugin);
2153 if (hton->recover)
2154 {
2155 hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
2156 ((struct xahton_st *)arg)->result= 0;
2157 }
2158 return FALSE;
2159 }
2160
xarollback_handlerton(THD * unused1,plugin_ref plugin,void * arg)2161 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
2162 void *arg)
2163 {
2164 handlerton *hton= plugin_hton(plugin);
2165 if (hton->recover)
2166 {
2167 hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
2168 ((struct xahton_st *)arg)->result= 0;
2169 }
2170 return FALSE;
2171 }
2172
2173
ha_commit_or_rollback_by_xid(XID * xid,bool commit)2174 int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
2175 {
2176 struct xahton_st xaop;
2177 xaop.xid= xid;
2178 xaop.result= 1;
2179
2180 plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
2181 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
2182
2183 return xaop.result;
2184 }
2185
2186
2187 #ifndef DBUG_OFF
2188 /** Converts XID to string.
2189
2190 @param[out] buf output buffer
2191 @param[in] xid XID to convert
2192
2193 @return pointer to converted string
2194
2195 @note This does not need to be multi-byte safe or anything */
xid_to_str(char * buf,const XID & xid)2196 static char *xid_to_str(char *buf, const XID &xid)
2197 {
2198 int i;
2199 char *s=buf;
2200 *s++='\'';
2201 for (i= 0; i < xid.gtrid_length + xid.bqual_length; i++)
2202 {
2203 uchar c= (uchar) xid.data[i];
2204 /* is_next_dig is set if next character is a number */
2205 bool is_next_dig= FALSE;
2206 if (i < XIDDATASIZE)
2207 {
2208 char ch= xid.data[i + 1];
2209 is_next_dig= (ch >= '0' && ch <='9');
2210 }
2211 if (i == xid.gtrid_length)
2212 {
2213 *s++='\'';
2214 if (xid.bqual_length)
2215 {
2216 *s++='.';
2217 *s++='\'';
2218 }
2219 }
2220 if (c < 32 || c > 126)
2221 {
2222 *s++='\\';
2223 /*
2224 If next character is a number, write current character with
2225 3 octal numbers to ensure that the next number is not seen
2226 as part of the octal number
2227 */
2228 if (c > 077 || is_next_dig)
2229 *s++=_dig_vec_lower[c >> 6];
2230 if (c > 007 || is_next_dig)
2231 *s++=_dig_vec_lower[(c >> 3) & 7];
2232 *s++=_dig_vec_lower[c & 7];
2233 }
2234 else
2235 {
2236 if (c == '\'' || c == '\\')
2237 *s++='\\';
2238 *s++=c;
2239 }
2240 }
2241 *s++='\'';
2242 *s=0;
2243 return buf;
2244 }
2245 #endif
2246
wsrep_order_and_check_continuity(XID * list,int len)2247 static my_xid wsrep_order_and_check_continuity(XID *list, int len)
2248 {
2249 #ifdef WITH_WSREP
2250 wsrep_sort_xid_array(list, len);
2251 wsrep::gtid cur_position= wsrep_get_SE_checkpoint<wsrep::gtid>();
2252 long long cur_seqno= cur_position.seqno().get();
2253 for (int i= 0; i < len; ++i)
2254 {
2255 if (!wsrep_is_wsrep_xid(list + i) ||
2256 wsrep_xid_seqno(list + i) != cur_seqno + 1)
2257 {
2258 WSREP_WARN("Discovered discontinuity in recovered wsrep "
2259 "transaction XIDs. Truncating the recovery list to "
2260 "%d entries", i);
2261 break;
2262 }
2263 ++cur_seqno;
2264 }
2265 WSREP_INFO("Last wsrep seqno to be recovered %lld", cur_seqno);
2266 return (cur_seqno < 0 ? 0 : cur_seqno);
2267 #else
2268 return 0;
2269 #endif /* WITH_WSREP */
2270 }
2271 /**
2272 recover() step of xa.
2273
2274 @note
2275 there are three modes of operation:
2276 - automatic recover after a crash
2277 in this case commit_list != 0, tc_heuristic_recover==0
2278 all xids from commit_list are committed, others are rolled back
2279 - manual (heuristic) recover
2280 in this case commit_list==0, tc_heuristic_recover != 0
2281 DBA has explicitly specified that all prepared transactions should
2282 be committed (or rolled back).
2283 - no recovery (MySQL did not detect a crash)
2284 in this case commit_list==0, tc_heuristic_recover == 0
2285 there should be no prepared transactions in this case.
2286 */
2287 struct xarecover_st
2288 {
2289 int len, found_foreign_xids, found_my_xids;
2290 XID *list;
2291 HASH *commit_list;
2292 bool dry_run;
2293 };
2294
xarecover_handlerton(THD * unused,plugin_ref plugin,void * arg)2295 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
2296 void *arg)
2297 {
2298 handlerton *hton= plugin_hton(plugin);
2299 struct xarecover_st *info= (struct xarecover_st *) arg;
2300 int got;
2301
2302 if (hton->recover)
2303 {
2304 while ((got= hton->recover(hton, info->list, info->len)) > 0 )
2305 {
2306 sql_print_information("Found %d prepared transaction(s) in %s",
2307 got, hton_name(hton)->str);
2308 /* If wsrep_on=ON, XIDs are first ordered and then the range of
2309 recovered XIDs is checked for continuity. All the XIDs which
2310 are in continuous range can be safely committed if binlog
2311 is off since they have already ordered and certified in the
2312 cluster.
2313
2314 The discontinuity of wsrep XIDs may happen because the GTID
2315 is assigned for transaction in wsrep_before_prepare(), but the
2316 commit order is entered in wsrep_before_commit(). This means that
2317 transactions may run prepare step out of order and may
2318 result in gap in wsrep XIDs. This can be the case for example
2319 if we have T1 with seqno 1 and T2 with seqno 2 and the server
2320 crashes after T2 finishes prepare step but before T1 starts
2321 the prepare.
2322 */
2323 my_xid wsrep_limit __attribute__((unused))= 0;
2324
2325 /* Note that we could call this for binlog also that
2326 will not have WSREP(thd) but global wsrep on might
2327 be true.
2328 */
2329 if (WSREP_ON)
2330 wsrep_limit= wsrep_order_and_check_continuity(info->list, got);
2331
2332 for (int i=0; i < got; i ++)
2333 {
2334 my_xid x= IF_WSREP(wsrep_is_wsrep_xid(&info->list[i]) ?
2335 wsrep_xid_seqno(&info->list[i]) :
2336 info->list[i].get_my_xid(),
2337 info->list[i].get_my_xid());
2338 if (!x) // not "mine" - that is generated by external TM
2339 {
2340 DBUG_EXECUTE("info",{
2341 char buf[XIDDATASIZE*4+6];
2342 _db_doprnt_("ignore xid %s", xid_to_str(buf, info->list[i]));
2343 });
2344 xid_cache_insert(info->list + i);
2345 info->found_foreign_xids++;
2346 continue;
2347 }
2348 if (IF_WSREP(!(wsrep_emulate_bin_log &&
2349 wsrep_is_wsrep_xid(info->list + i) &&
2350 x <= wsrep_limit) && info->dry_run,
2351 info->dry_run))
2352 {
2353 info->found_my_xids++;
2354 continue;
2355 }
2356 // recovery mode
2357 if (IF_WSREP((wsrep_emulate_bin_log &&
2358 wsrep_is_wsrep_xid(info->list + i) &&
2359 x <= wsrep_limit), false) ||
2360 (info->commit_list ?
2361 my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
2362 tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT))
2363 {
2364 int rc= hton->commit_by_xid(hton, info->list+i);
2365 if (rc == 0)
2366 {
2367 DBUG_EXECUTE("info",{
2368 char buf[XIDDATASIZE*4+6];
2369 _db_doprnt_("commit xid %s", xid_to_str(buf, info->list[i]));
2370 });
2371 }
2372 }
2373 else
2374 {
2375 int rc= hton->rollback_by_xid(hton, info->list+i);
2376 if (rc == 0)
2377 {
2378 DBUG_EXECUTE("info",{
2379 char buf[XIDDATASIZE*4+6];
2380 _db_doprnt_("rollback xid %s", xid_to_str(buf, info->list[i]));
2381 });
2382 }
2383 }
2384 }
2385 if (got < info->len)
2386 break;
2387 }
2388 }
2389 return FALSE;
2390 }
2391
ha_recover(HASH * commit_list)2392 int ha_recover(HASH *commit_list)
2393 {
2394 struct xarecover_st info;
2395 DBUG_ENTER("ha_recover");
2396 info.found_foreign_xids= info.found_my_xids= 0;
2397 info.commit_list= commit_list;
2398 info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
2399 info.list= NULL;
2400
2401 /* commit_list and tc_heuristic_recover cannot be set both */
2402 DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
2403 /* if either is set, total_ha_2pc must be set too */
2404 DBUG_ASSERT(info.dry_run ||
2405 (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
2406
2407 if (total_ha_2pc <= (ulong)opt_bin_log)
2408 DBUG_RETURN(0);
2409
2410 if (info.commit_list)
2411 sql_print_information("Starting crash recovery...");
2412
2413 for (info.len= MAX_XID_LIST_SIZE ;
2414 info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
2415 {
2416 DBUG_EXECUTE_IF("min_xa_len", info.len = 16;);
2417 info.list=(XID *)my_malloc(key_memory_XID, info.len*sizeof(XID), MYF(0));
2418 }
2419 if (!info.list)
2420 {
2421 sql_print_error(ER(ER_OUTOFMEMORY),
2422 static_cast<int>(info.len*sizeof(XID)));
2423 DBUG_RETURN(1);
2424 }
2425
2426 plugin_foreach(NULL, xarecover_handlerton,
2427 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
2428
2429 my_free(info.list);
2430 if (info.found_foreign_xids)
2431 sql_print_warning("Found %d prepared XA transactions",
2432 info.found_foreign_xids);
2433 if (info.dry_run && info.found_my_xids)
2434 {
2435 sql_print_error("Found %d prepared transactions! It means that mysqld was "
2436 "not shut down properly last time and critical recovery "
2437 "information (last binlog or %s file) was manually deleted "
2438 "after a crash. You have to start mysqld with "
2439 "--tc-heuristic-recover switch to commit or rollback "
2440 "pending transactions.",
2441 info.found_my_xids, opt_tc_log_file);
2442 DBUG_RETURN(1);
2443 }
2444 if (info.commit_list)
2445 sql_print_information("Crash recovery finished.");
2446 DBUG_RETURN(0);
2447 }
2448
2449
2450 /*
2451 Called by engine to notify TC that a new commit checkpoint has been reached.
2452 See comments on handlerton method commit_checkpoint_request() for details.
2453 */
commit_checkpoint_notify_ha(void * cookie)2454 void commit_checkpoint_notify_ha(void *cookie)
2455 {
2456 tc_log->commit_checkpoint_notify(cookie);
2457 }
2458
2459
2460 /**
2461 Check if all storage engines used in transaction agree that after
2462 rollback to savepoint it is safe to release MDL locks acquired after
2463 savepoint creation.
2464
2465 @param thd The client thread that executes the transaction.
2466
2467 @return true - It is safe to release MDL locks.
2468 false - If it is not.
2469 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2470 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2471 {
2472 Ha_trx_info *ha_info;
2473 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2474 &thd->transaction->all);
2475
2476 DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2477
2478 /**
2479 Checking whether it is safe to release metadata locks after rollback to
2480 savepoint in all the storage engines that are part of the transaction.
2481 */
2482 for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2483 {
2484 handlerton *ht= ha_info->ht();
2485 DBUG_ASSERT(ht);
2486
2487 if (ht->savepoint_rollback_can_release_mdl == 0 ||
2488 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2489 DBUG_RETURN(false);
2490 }
2491
2492 DBUG_RETURN(true);
2493 }
2494
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2495 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2496 {
2497 int error=0;
2498 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2499 &thd->transaction->all);
2500 Ha_trx_info *ha_info, *ha_info_next;
2501
2502 DBUG_ENTER("ha_rollback_to_savepoint");
2503
2504 trans->no_2pc=0;
2505 /*
2506 rolling back to savepoint in all storage engines that were part of the
2507 transaction when the savepoint was set
2508 */
2509 for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2510 {
2511 int err;
2512 handlerton *ht= ha_info->ht();
2513 DBUG_ASSERT(ht);
2514 DBUG_ASSERT(ht->savepoint_set != 0);
2515 if ((err= ht->savepoint_rollback(ht, thd,
2516 (uchar *)(sv+1)+ht->savepoint_offset)))
2517 { // cannot happen
2518 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2519 error=1;
2520 }
2521 status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2522 trans->no_2pc|= ht->prepare == 0;
2523 }
2524 /*
2525 rolling back the transaction in all storage engines that were not part of
2526 the transaction when the savepoint was set
2527 */
2528 for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2529 ha_info= ha_info_next)
2530 {
2531 int err;
2532 handlerton *ht= ha_info->ht();
2533 #ifdef WITH_WSREP
2534 if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2535 {
2536 WSREP_DEBUG("ha_rollback_to_savepoint: run before_rollbackha_rollback_trans hook");
2537 (void) wsrep_before_rollback(thd, !thd->in_sub_stmt);
2538
2539 }
2540 #endif // WITH_WSREP
2541 if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2542 { // cannot happen
2543 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2544 error=1;
2545 }
2546 #ifdef WITH_WSREP
2547 if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2548 {
2549 WSREP_DEBUG("ha_rollback_to_savepoint: run after_rollback hook");
2550 (void) wsrep_after_rollback(thd, !thd->in_sub_stmt);
2551 }
2552 #endif // WITH_WSREP
2553 status_var_increment(thd->status_var.ha_rollback_count);
2554 ha_info_next= ha_info->next();
2555 ha_info->reset(); /* keep it conveniently zero-filled */
2556 }
2557 trans->ha_list= sv->ha_list;
2558
2559 if (thd->m_transaction_psi != NULL)
2560 MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2561
2562 DBUG_RETURN(error);
2563 }
2564
2565 /**
2566 @note
2567 according to the sql standard (ISO/IEC 9075-2:2003)
2568 section "4.33.4 SQL-statements and transaction states",
2569 SAVEPOINT is *not* transaction-initiating SQL-statement
2570 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2571 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2572 {
2573 #ifdef WITH_WSREP
2574 /*
2575 Register binlog hton for savepoint processing if wsrep binlog
2576 emulation is on.
2577 */
2578 if (WSREP_EMULATE_BINLOG(thd) && wsrep_thd_is_local(thd))
2579 {
2580 wsrep_register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
2581 }
2582 #endif /* WITH_WSREP */
2583 int error=0;
2584 THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2585 &thd->transaction->all);
2586 Ha_trx_info *ha_info= trans->ha_list;
2587 DBUG_ENTER("ha_savepoint");
2588
2589 for (; ha_info; ha_info= ha_info->next())
2590 {
2591 int err;
2592 handlerton *ht= ha_info->ht();
2593 DBUG_ASSERT(ht);
2594 if (! ht->savepoint_set)
2595 {
2596 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2597 error=1;
2598 break;
2599 }
2600 if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2601 { // cannot happen
2602 my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2603 error=1;
2604 }
2605 status_var_increment(thd->status_var.ha_savepoint_count);
2606 }
2607 /*
2608 Remember the list of registered storage engines. All new
2609 engines are prepended to the beginning of the list.
2610 */
2611 sv->ha_list= trans->ha_list;
2612
2613 if (!error && thd->m_transaction_psi != NULL)
2614 MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2615
2616 DBUG_RETURN(error);
2617 }
2618
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2619 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2620 {
2621 int error=0;
2622 Ha_trx_info *ha_info= sv->ha_list;
2623 DBUG_ENTER("ha_release_savepoint");
2624
2625 for (; ha_info; ha_info= ha_info->next())
2626 {
2627 int err;
2628 handlerton *ht= ha_info->ht();
2629 /* Savepoint life time is enclosed into transaction life time. */
2630 DBUG_ASSERT(ht);
2631 if (!ht->savepoint_release)
2632 continue;
2633 if ((err= ht->savepoint_release(ht, thd,
2634 (uchar *)(sv+1) + ht->savepoint_offset)))
2635 { // cannot happen
2636 my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2637 error=1;
2638 }
2639 }
2640
2641 if (thd->m_transaction_psi != NULL)
2642 MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2643
2644 DBUG_RETURN(error);
2645 }
2646
2647
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2648 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2649 void *arg)
2650 {
2651 handlerton *hton= plugin_hton(plugin);
2652 if (hton->start_consistent_snapshot)
2653 {
2654 if (hton->start_consistent_snapshot(hton, thd))
2655 return TRUE;
2656 *((bool *)arg)= false;
2657 }
2658 return FALSE;
2659 }
2660
ha_start_consistent_snapshot(THD * thd)2661 int ha_start_consistent_snapshot(THD *thd)
2662 {
2663 bool err, warn= true;
2664
2665 /*
2666 Holding the LOCK_commit_ordered mutex ensures that we get the same
2667 snapshot for all engines (including the binary log). This allows us
2668 among other things to do backups with
2669 START TRANSACTION WITH CONSISTENT SNAPSHOT and
2670 have a consistent binlog position.
2671 */
2672 mysql_mutex_lock(&LOCK_commit_ordered);
2673 err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2674 mysql_mutex_unlock(&LOCK_commit_ordered);
2675
2676 if (err)
2677 {
2678 ha_rollback_trans(thd, true);
2679 return 1;
2680 }
2681
2682 /*
2683 Same idea as when one wants to CREATE TABLE in one engine which does not
2684 exist:
2685 */
2686 if (warn)
2687 push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2688 "This MariaDB server does not support any "
2689 "consistent-read capable storage engine");
2690 return 0;
2691 }
2692
2693
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2694 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2695 void *arg)
2696 {
2697 handlerton *hton= plugin_hton(plugin);
2698 return hton->flush_logs && hton->flush_logs(hton);
2699 }
2700
2701
ha_flush_logs()2702 bool ha_flush_logs()
2703 {
2704 return plugin_foreach(NULL, flush_handlerton,
2705 MYSQL_STORAGE_ENGINE_PLUGIN, 0);
2706 }
2707
2708
2709 /**
2710 @brief make canonical filename
2711
2712 @param[in] file table handler
2713 @param[in] path original path
2714 @param[out] tmp_path buffer for canonized path
2715
2716 @details Lower case db name and table name path parts for
2717 non file based tables when lower_case_table_names
2718 is 2 (store as is, compare in lower case).
2719 Filesystem path prefix (mysql_data_home or tmpdir)
2720 is left intact.
2721
2722 @note tmp_path may be left intact if no conversion was
2723 performed.
2724
2725 @retval canonized path
2726
2727 @todo This may be done more efficiently when table path
2728 gets built. Convert this function to something like
2729 ASSERT_CANONICAL_FILENAME.
2730 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2731 const char *get_canonical_filename(handler *file, const char *path,
2732 char *tmp_path)
2733 {
2734 uint i;
2735 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2736 return path;
2737
2738 for (i= 0; i <= mysql_tmpdir_list.max; i++)
2739 {
2740 if (is_prefix(path, mysql_tmpdir_list.list[i]))
2741 return path;
2742 }
2743
2744 /* Ensure that table handler get path in lower case */
2745 if (tmp_path != path)
2746 strmov(tmp_path, path);
2747
2748 /*
2749 we only should turn into lowercase database/table part
2750 so start the process after homedirectory
2751 */
2752 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2753 return tmp_path;
2754 }
2755
2756
2757 /**
2758 Delete a table in the engine
2759
2760 @return 0 Table was deleted
2761 @return -1 Table didn't exists, no error given
2762 @return # Error from table handler
2763
2764 @note
2765 ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors.
2766 The .frm file should be deleted by the caller only if we return <= 0.
2767 */
2768
ha_delete_table(THD * thd,handlerton * hton,const char * path,const LEX_CSTRING * db,const LEX_CSTRING * alias,bool generate_warning)2769 int ha_delete_table(THD *thd, handlerton *hton, const char *path,
2770 const LEX_CSTRING *db, const LEX_CSTRING *alias,
2771 bool generate_warning)
2772 {
2773 int error;
2774 bool is_error= thd->is_error();
2775 DBUG_ENTER("ha_delete_table");
2776
2777 /* hton is NULL in ALTER TABLE when renaming only .frm files */
2778 if (hton == NULL || hton == view_pseudo_hton)
2779 DBUG_RETURN(0);
2780
2781 if (ha_check_if_updates_are_ignored(thd, hton, "DROP"))
2782 DBUG_RETURN(0);
2783
2784 error= hton->drop_table(hton, path);
2785 if (error > 0)
2786 {
2787 /*
2788 It's not an error if the table doesn't exist in the engine.
2789 warn the user, but still report DROP being a success
2790 */
2791 bool intercept= non_existing_table_error(error);
2792
2793 if ((!intercept || generate_warning) && ! thd->is_error())
2794 {
2795 TABLE dummy_table;
2796 TABLE_SHARE dummy_share;
2797 handler *file= get_new_handler(nullptr, thd->mem_root, hton);
2798 if (file) {
2799 bzero((char*) &dummy_table, sizeof(dummy_table));
2800 bzero((char*) &dummy_share, sizeof(dummy_share));
2801 dummy_share.path.str= (char*) path;
2802 dummy_share.path.length= strlen(path);
2803 dummy_share.normalized_path= dummy_share.path;
2804 dummy_share.db= *db;
2805 dummy_share.table_name= *alias;
2806 dummy_table.s= &dummy_share;
2807 dummy_table.alias.set(alias->str, alias->length, table_alias_charset);
2808 file->change_table_ptr(&dummy_table, &dummy_share);
2809 file->print_error(error, MYF(intercept ? ME_WARNING : 0));
2810 delete file;
2811 }
2812 }
2813 if (intercept)
2814 {
2815 /* Clear error if we got it in this function */
2816 if (!is_error)
2817 thd->clear_error();
2818 error= -1;
2819 }
2820 }
2821 if (error)
2822 DBUG_PRINT("exit", ("error: %d", error));
2823 DBUG_RETURN(error);
2824 }
2825
2826 /****************************************************************************
2827 ** General handler functions
2828 ****************************************************************************/
2829
2830
2831 /**
2832 Clone a handler
2833
2834 @param name name of new table instance
2835 @param mem_root Where 'this->ref' should be allocated. It can't be
2836 in this->table->mem_root as otherwise we will not be
2837 able to reclaim that memory when the clone handler
2838 object is destroyed.
2839 */
2840
clone(const char * name,MEM_ROOT * mem_root)2841 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2842 {
2843 handler *new_handler= get_new_handler(table->s, mem_root, ht);
2844
2845 if (!new_handler)
2846 return NULL;
2847 if (new_handler->set_ha_share_ref(ha_share))
2848 goto err;
2849
2850 /*
2851 TODO: Implement a more efficient way to have more than one index open for
2852 the same table instance. The ha_open call is not cacheable for clone.
2853
2854 This is not critical as the engines already have the table open
2855 and should be able to use the original instance of the table.
2856 */
2857 if (new_handler->ha_open(table, name, table->db_stat,
2858 HA_OPEN_IGNORE_IF_LOCKED, mem_root))
2859 goto err;
2860
2861 return new_handler;
2862
2863 err:
2864 delete new_handler;
2865 return NULL;
2866 }
2867
2868
2869 /**
2870 clone of current handler.
2871
2872 Creates a clone of handler used for unique hash key and WITHOUT OVERLAPS.
2873 @return error code
2874 */
create_lookup_handler()2875 int handler::create_lookup_handler()
2876 {
2877 handler *tmp;
2878 if (lookup_handler != this)
2879 return 0;
2880 if (!(tmp= clone(table->s->normalized_path.str, table->in_use->mem_root)))
2881 return 1;
2882 lookup_handler= tmp;
2883 return lookup_handler->ha_external_lock(table->in_use, F_RDLCK);
2884 }
2885
engine_name()2886 LEX_CSTRING *handler::engine_name()
2887 {
2888 return hton_name(ht);
2889 }
2890
2891
2892 /*
2893 It is assumed that the value of the parameter 'ranges' can be only 0 or 1.
2894 If ranges == 1 then the function returns the cost of index only scan
2895 by index 'keyno' of one range containing 'rows' key entries.
2896 If ranges == 0 then the function returns only the cost of copying
2897 those key entries into the engine buffers.
2898 */
2899
keyread_time(uint index,uint ranges,ha_rows rows)2900 double handler::keyread_time(uint index, uint ranges, ha_rows rows)
2901 {
2902 DBUG_ASSERT(ranges == 0 || ranges == 1);
2903 size_t len= table->key_info[index].key_length + ref_length;
2904 if (table->file->is_clustering_key(index))
2905 len= table->s->stored_rec_length;
2906 double cost= (double)rows*len/(stats.block_size+1)*IDX_BLOCK_COPY_COST;
2907 if (ranges)
2908 {
2909 uint keys_per_block= (uint) (stats.block_size*3/4/len+1);
2910 ulonglong blocks= (rows+ keys_per_block- 1)/keys_per_block;
2911 cost+= blocks;
2912 }
2913 return cost;
2914 }
2915
2916
ha_thd(void) const2917 THD *handler::ha_thd(void) const
2918 {
2919 DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2920 return (table && table->in_use) ? table->in_use : current_thd;
2921 }
2922
unbind_psi()2923 void handler::unbind_psi()
2924 {
2925 /*
2926 Notify the instrumentation that this table is not owned
2927 by this thread any more.
2928 */
2929 PSI_CALL_unbind_table(m_psi);
2930 }
2931
rebind_psi()2932 void handler::rebind_psi()
2933 {
2934 /*
2935 Notify the instrumentation that this table is now owned
2936 by this thread.
2937 */
2938 m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi);
2939 }
2940
2941
start_psi_batch_mode()2942 void handler::start_psi_batch_mode()
2943 {
2944 #ifdef HAVE_PSI_TABLE_INTERFACE
2945 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2946 DBUG_ASSERT(m_psi_locker == NULL);
2947 m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2948 m_psi_numrows= 0;
2949 #endif
2950 }
2951
end_psi_batch_mode()2952 void handler::end_psi_batch_mode()
2953 {
2954 #ifdef HAVE_PSI_TABLE_INTERFACE
2955 DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2956 if (m_psi_locker != NULL)
2957 {
2958 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2959 PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2960 m_psi_locker= NULL;
2961 }
2962 m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2963 #endif
2964 }
2965
ha_table_share_psi() const2966 PSI_table_share *handler::ha_table_share_psi() const
2967 {
2968 return table_share->m_psi;
2969 }
2970
2971 /** @brief
2972 Open database-handler.
2973
2974 IMPLEMENTATION
2975 Try O_RDONLY if cannot open as O_RDWR
2976 Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2977 */
ha_open(TABLE * table_arg,const char * name,int mode,uint test_if_locked,MEM_ROOT * mem_root,List<String> * partitions_to_open)2978 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2979 uint test_if_locked, MEM_ROOT *mem_root,
2980 List<String> *partitions_to_open)
2981 {
2982 int error;
2983 DBUG_ENTER("handler::ha_open");
2984 DBUG_PRINT("enter",
2985 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2986 name, ht->db_type, table_arg->db_stat, mode,
2987 test_if_locked));
2988
2989 table= table_arg;
2990 DBUG_ASSERT(table->s == table_share);
2991 DBUG_ASSERT(m_lock_type == F_UNLCK);
2992 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2993 DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2994
2995 set_partitions_to_open(partitions_to_open);
2996
2997 if (unlikely((error=open(name,mode,test_if_locked))))
2998 {
2999 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
3000 (table->db_stat & HA_TRY_READ_ONLY))
3001 {
3002 table->db_stat|=HA_READ_ONLY;
3003 error=open(name,O_RDONLY,test_if_locked);
3004 }
3005 }
3006 if (unlikely(error))
3007 {
3008 my_errno= error; /* Safeguard */
3009 DBUG_PRINT("error",("error: %d errno: %d",error,errno));
3010 }
3011 else
3012 {
3013 DBUG_ASSERT(m_psi == NULL);
3014 DBUG_ASSERT(table_share != NULL);
3015 /*
3016 Do not call this for partitions handlers, since it may take too much
3017 resources.
3018 So only use the m_psi on table level, not for individual partitions.
3019 */
3020 if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
3021 {
3022 m_psi= PSI_CALL_open_table(ha_table_share_psi(), this);
3023 }
3024
3025 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
3026 table->db_stat|=HA_READ_ONLY;
3027 (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
3028
3029 /* Allocate ref in thd or on the table's mem_root */
3030 if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root,
3031 ALIGN_SIZE(ref_length)*2)))
3032 {
3033 ha_close();
3034 error=HA_ERR_OUT_OF_MEM;
3035 }
3036 else
3037 dup_ref=ref+ALIGN_SIZE(ref_length);
3038 cached_table_flags= table_flags();
3039 }
3040 reset_statistics();
3041 internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
3042 DBUG_RETURN(error);
3043 }
3044
ha_close(void)3045 int handler::ha_close(void)
3046 {
3047 DBUG_ENTER("ha_close");
3048 /*
3049 Increment global statistics for temporary tables.
3050 In_use is 0 for tables that was closed from the table cache.
3051 */
3052 if (table->in_use)
3053 status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
3054 PSI_CALL_close_table(table_share, m_psi);
3055 m_psi= NULL; /* instrumentation handle, invalid after close_table() */
3056 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
3057 DBUG_ASSERT(m_psi_locker == NULL);
3058
3059 /* Detach from ANALYZE tracker */
3060 tracker= NULL;
3061 /* We use ref as way to check that open succeded */
3062 ref= 0;
3063
3064 DBUG_ASSERT(m_lock_type == F_UNLCK);
3065 DBUG_ASSERT(inited == NONE);
3066 DBUG_RETURN(close());
3067 }
3068
3069
ha_rnd_next(uchar * buf)3070 int handler::ha_rnd_next(uchar *buf)
3071 {
3072 int result;
3073 DBUG_ENTER("handler::ha_rnd_next");
3074 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3075 m_lock_type != F_UNLCK);
3076 DBUG_ASSERT(inited == RND);
3077
3078 do
3079 {
3080 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3081 { result= rnd_next(buf); })
3082 if (result != HA_ERR_RECORD_DELETED)
3083 break;
3084 status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count);
3085 } while (!table->in_use->check_killed(1));
3086
3087 if (result == HA_ERR_RECORD_DELETED)
3088 result= HA_ERR_ABORTED_BY_USER;
3089 else
3090 {
3091 if (!result)
3092 {
3093 update_rows_read();
3094 if (table->vfield && buf == table->record[0])
3095 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3096 }
3097 increment_statistics(&SSV::ha_read_rnd_next_count);
3098 }
3099
3100 table->status=result ? STATUS_NOT_FOUND: 0;
3101 DBUG_RETURN(result);
3102 }
3103
ha_rnd_pos(uchar * buf,uchar * pos)3104 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
3105 {
3106 int result;
3107 DBUG_ENTER("handler::ha_rnd_pos");
3108 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3109 m_lock_type != F_UNLCK);
3110 DBUG_ASSERT(inited == RND);
3111
3112 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3113 { result= rnd_pos(buf, pos); })
3114 increment_statistics(&SSV::ha_read_rnd_count);
3115 if (result == HA_ERR_RECORD_DELETED)
3116 result= HA_ERR_KEY_NOT_FOUND;
3117 else if (!result)
3118 {
3119 update_rows_read();
3120 if (table->vfield && buf == table->record[0])
3121 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3122 }
3123 table->status=result ? STATUS_NOT_FOUND: 0;
3124 DBUG_RETURN(result);
3125 }
3126
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3127 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3128 key_part_map keypart_map,
3129 enum ha_rkey_function find_flag)
3130 {
3131 int result;
3132 DBUG_ENTER("handler::ha_index_read_map");
3133 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3134 m_lock_type != F_UNLCK);
3135 DBUG_ASSERT(inited==INDEX);
3136
3137 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3138 { result= index_read_map(buf, key, keypart_map, find_flag); })
3139 increment_statistics(&SSV::ha_read_key_count);
3140 if (!result)
3141 {
3142 update_index_statistics();
3143 if (table->vfield && buf == table->record[0])
3144 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3145 }
3146 table->status=result ? STATUS_NOT_FOUND: 0;
3147 DBUG_RETURN(result);
3148 }
3149
3150 /*
3151 @note: Other index lookup/navigation functions require prior
3152 handler->index_init() call. This function is different, it requires
3153 that the scan is not initialized, and accepts "uint index" as an argument.
3154 */
3155
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3156 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3157 key_part_map keypart_map,
3158 enum ha_rkey_function find_flag)
3159 {
3160 int result;
3161 DBUG_ASSERT(inited==NONE);
3162 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3163 m_lock_type != F_UNLCK);
3164 DBUG_ASSERT(end_range == NULL);
3165 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, index, result,
3166 { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3167 increment_statistics(&SSV::ha_read_key_count);
3168 if (!result)
3169 {
3170 update_rows_read();
3171 index_rows_read[index]++;
3172 if (table->vfield && buf == table->record[0])
3173 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3174 }
3175 table->status=result ? STATUS_NOT_FOUND: 0;
3176 return result;
3177 }
3178
ha_index_next(uchar * buf)3179 int handler::ha_index_next(uchar * buf)
3180 {
3181 int result;
3182 DBUG_ENTER("handler::ha_index_next");
3183 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3184 m_lock_type != F_UNLCK);
3185 DBUG_ASSERT(inited==INDEX);
3186
3187 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3188 { result= index_next(buf); })
3189 increment_statistics(&SSV::ha_read_next_count);
3190 if (!result)
3191 {
3192 update_index_statistics();
3193 if (table->vfield && buf == table->record[0])
3194 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3195 }
3196 table->status=result ? STATUS_NOT_FOUND: 0;
3197
3198 DEBUG_SYNC(ha_thd(), "handler_ha_index_next_end");
3199
3200 DBUG_RETURN(result);
3201 }
3202
ha_index_prev(uchar * buf)3203 int handler::ha_index_prev(uchar * buf)
3204 {
3205 int result;
3206 DBUG_ENTER("handler::ha_index_prev");
3207 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3208 m_lock_type != F_UNLCK);
3209 DBUG_ASSERT(inited==INDEX);
3210
3211 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3212 { result= index_prev(buf); })
3213 increment_statistics(&SSV::ha_read_prev_count);
3214 if (!result)
3215 {
3216 update_index_statistics();
3217 if (table->vfield && buf == table->record[0])
3218 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3219 }
3220 table->status=result ? STATUS_NOT_FOUND: 0;
3221 DBUG_RETURN(result);
3222 }
3223
ha_index_first(uchar * buf)3224 int handler::ha_index_first(uchar * buf)
3225 {
3226 int result;
3227 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3228 m_lock_type != F_UNLCK);
3229 DBUG_ASSERT(inited==INDEX);
3230
3231 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3232 { result= index_first(buf); })
3233 increment_statistics(&SSV::ha_read_first_count);
3234 if (!result)
3235 {
3236 update_index_statistics();
3237 if (table->vfield && buf == table->record[0])
3238 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3239 }
3240 table->status=result ? STATUS_NOT_FOUND: 0;
3241 return result;
3242 }
3243
ha_index_last(uchar * buf)3244 int handler::ha_index_last(uchar * buf)
3245 {
3246 int result;
3247 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3248 m_lock_type != F_UNLCK);
3249 DBUG_ASSERT(inited==INDEX);
3250
3251 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3252 { result= index_last(buf); })
3253 increment_statistics(&SSV::ha_read_last_count);
3254 if (!result)
3255 {
3256 update_index_statistics();
3257 if (table->vfield && buf == table->record[0])
3258 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3259 }
3260 table->status=result ? STATUS_NOT_FOUND: 0;
3261 return result;
3262 }
3263
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3264 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3265 {
3266 int result;
3267 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3268 m_lock_type != F_UNLCK);
3269 DBUG_ASSERT(inited==INDEX);
3270
3271 TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3272 { result= index_next_same(buf, key, keylen); })
3273 increment_statistics(&SSV::ha_read_next_count);
3274 if (!result)
3275 {
3276 update_index_statistics();
3277 if (table->vfield && buf == table->record[0])
3278 table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3279 }
3280 table->status=result ? STATUS_NOT_FOUND: 0;
3281 return result;
3282 }
3283
3284
ha_was_semi_consistent_read()3285 bool handler::ha_was_semi_consistent_read()
3286 {
3287 bool result= was_semi_consistent_read();
3288 if (result)
3289 increment_statistics(&SSV::ha_read_retry_count);
3290 return result;
3291 }
3292
3293 /* Initialize handler for random reading, with error handling */
3294
ha_rnd_init_with_error(bool scan)3295 int handler::ha_rnd_init_with_error(bool scan)
3296 {
3297 int error;
3298 if (likely(!(error= ha_rnd_init(scan))))
3299 return 0;
3300 table->file->print_error(error, MYF(0));
3301 return error;
3302 }
3303
3304
3305 /**
3306 Read first row (only) from a table. Used for reading tables with
3307 only one row, either based on table statistics or if table is a SEQUENCE.
3308
3309 This is never called for normal InnoDB tables, as these table types
3310 does not have HA_STATS_RECORDS_IS_EXACT set.
3311 */
read_first_row(uchar * buf,uint primary_key)3312 int handler::read_first_row(uchar * buf, uint primary_key)
3313 {
3314 int error;
3315 DBUG_ENTER("handler::read_first_row");
3316
3317 /*
3318 If there is very few deleted rows in the table, find the first row by
3319 scanning the table.
3320 TODO remove the test for HA_READ_ORDER
3321 */
3322 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3323 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3324 {
3325 if (likely(!(error= ha_rnd_init(1))))
3326 {
3327 error= ha_rnd_next(buf);
3328 const int end_error= ha_rnd_end();
3329 if (likely(!error))
3330 error= end_error;
3331 }
3332 }
3333 else
3334 {
3335 /* Find the first row through the primary key */
3336 if (likely(!(error= ha_index_init(primary_key, 0))))
3337 {
3338 error= ha_index_first(buf);
3339 const int end_error= ha_index_end();
3340 if (likely(!error))
3341 error= end_error;
3342 }
3343 }
3344 DBUG_RETURN(error);
3345 }
3346
3347 /**
3348 Generate the next auto-increment number based on increment and offset.
3349 computes the lowest number
3350 - strictly greater than "nr"
3351 - of the form: auto_increment_offset + N * auto_increment_increment
3352 If overflow happened then return MAX_ULONGLONG value as an
3353 indication of overflow.
3354 In most cases increment= offset= 1, in which case we get:
3355 @verbatim 1,2,3,4,5,... @endverbatim
3356 If increment=10 and offset=5 and previous number is 1, we get:
3357 @verbatim 1,5,15,25,35,... @endverbatim
3358 */
3359 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3360 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3361 {
3362 const ulonglong save_nr= nr;
3363
3364 if (variables->auto_increment_increment == 1)
3365 nr= nr + 1; // optimization of the formula below
3366 else
3367 {
3368 /*
3369 Calculating the number of complete auto_increment_increment extents:
3370 */
3371 nr= (nr + variables->auto_increment_increment -
3372 variables->auto_increment_offset) /
3373 (ulonglong) variables->auto_increment_increment;
3374 /*
3375 Adding an offset to the auto_increment_increment extent boundary:
3376 */
3377 nr= nr * (ulonglong) variables->auto_increment_increment +
3378 variables->auto_increment_offset;
3379 }
3380
3381 if (unlikely(nr <= save_nr))
3382 return ULONGLONG_MAX;
3383
3384 return nr;
3385 }
3386
3387
adjust_next_insert_id_after_explicit_value(ulonglong nr)3388 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3389 {
3390 /*
3391 If we have set THD::next_insert_id previously and plan to insert an
3392 explicitly-specified value larger than this, we need to increase
3393 THD::next_insert_id to be greater than the explicit value.
3394 */
3395 if ((next_insert_id > 0) && (nr >= next_insert_id))
3396 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3397 }
3398
3399
3400 /** @brief
3401 Computes the largest number X:
3402 - smaller than or equal to "nr"
3403 - of the form: auto_increment_offset + N * auto_increment_increment
3404 where N>=0.
3405
3406 SYNOPSIS
3407 prev_insert_id
3408 nr Number to "round down"
3409 variables variables struct containing auto_increment_increment and
3410 auto_increment_offset
3411
3412 RETURN
3413 The number X if it exists, "nr" otherwise.
3414 */
3415 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3416 prev_insert_id(ulonglong nr, struct system_variables *variables)
3417 {
3418 if (unlikely(nr < variables->auto_increment_offset))
3419 {
3420 /*
3421 There's nothing good we can do here. That is a pathological case, where
3422 the offset is larger than the column's max possible value, i.e. not even
3423 the first sequence value may be inserted. User will receive warning.
3424 */
3425 DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3426 "auto_increment_offset: %lu",
3427 (ulong) nr, variables->auto_increment_offset));
3428 return nr;
3429 }
3430 if (variables->auto_increment_increment == 1)
3431 return nr; // optimization of the formula below
3432 /*
3433 Calculating the number of complete auto_increment_increment extents:
3434 */
3435 nr= (nr - variables->auto_increment_offset) /
3436 (ulonglong) variables->auto_increment_increment;
3437 /*
3438 Adding an offset to the auto_increment_increment extent boundary:
3439 */
3440 return (nr * (ulonglong) variables->auto_increment_increment +
3441 variables->auto_increment_offset);
3442 }
3443
3444
3445 /**
3446 Update the auto_increment field if necessary.
3447
3448 Updates columns with type NEXT_NUMBER if:
3449
3450 - If column value is set to NULL (in which case
3451 auto_increment_field_not_null is 0)
3452 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3453 set. In the future we will only set NEXT_NUMBER fields if one sets them
3454 to NULL (or they are not included in the insert list).
3455
3456 In those cases, we check if the currently reserved interval still has
3457 values we have not used. If yes, we pick the smallest one and use it.
3458 Otherwise:
3459
3460 - If a list of intervals has been provided to the statement via SET
3461 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3462 first unused interval from this list, consider it as reserved.
3463
3464 - Otherwise we set the column for the first row to the value
3465 next_insert_id(get_auto_increment(column))) which is usually
3466 max-used-column-value+1.
3467 We call get_auto_increment() for the first row in a multi-row
3468 statement. get_auto_increment() will tell us the interval of values it
3469 reserved for us.
3470
3471 - In both cases, for the following rows we use those reserved values without
3472 calling the handler again (we just progress in the interval, computing
3473 each new value from the previous one). Until we have exhausted them, then
3474 we either take the next provided interval or call get_auto_increment()
3475 again to reserve a new interval.
3476
3477 - In both cases, the reserved intervals are remembered in
3478 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3479 binlogging; the last reserved interval is remembered in
3480 auto_inc_interval_for_cur_row. The number of reserved intervals is
3481 remembered in auto_inc_intervals_count. It differs from the number of
3482 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3483 latter list is cumulative over all statements forming one binlog event
3484 (when stored functions and triggers are used), and collapses two
3485 contiguous intervals in one (see its append() method).
3486
3487 The idea is that generated auto_increment values are predictable and
3488 independent of the column values in the table. This is needed to be
3489 able to replicate into a table that already has rows with a higher
3490 auto-increment value than the one that is inserted.
3491
3492 After we have already generated an auto-increment number and the user
3493 inserts a column with a higher value than the last used one, we will
3494 start counting from the inserted value.
3495
3496 This function's "outputs" are: the table's auto_increment field is filled
3497 with a value, thd->next_insert_id is filled with the value to use for the
3498 next row, if a value was autogenerated for the current row it is stored in
3499 thd->insert_id_for_cur_row, if get_auto_increment() was called
3500 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3501 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3502 this list.
3503
3504 @todo
3505 Replace all references to "next number" or NEXT_NUMBER to
3506 "auto_increment", everywhere (see below: there is
3507 table->auto_increment_field_not_null, and there also exists
3508 table->next_number_field, it's not consistent).
3509
3510 @retval
3511 0 ok
3512 @retval
3513 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3514 returned ~(ulonglong) 0
3515 @retval
3516 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3517 failure.
3518 */
3519
3520 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3521 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3522 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3523
update_auto_increment()3524 int handler::update_auto_increment()
3525 {
3526 ulonglong nr, nb_reserved_values;
3527 bool append= FALSE;
3528 THD *thd= table->in_use;
3529 struct system_variables *variables= &thd->variables;
3530 int result=0, tmp;
3531 DBUG_ENTER("handler::update_auto_increment");
3532
3533 /*
3534 next_insert_id is a "cursor" into the reserved interval, it may go greater
3535 than the interval, but not smaller.
3536 */
3537 DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3538
3539 if ((nr= table->next_number_field->val_int()) != 0 ||
3540 (table->auto_increment_field_not_null &&
3541 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3542 {
3543
3544 /*
3545 There could be an error reported because value was truncated
3546 when strict mode is enabled.
3547 */
3548 if (thd->is_error())
3549 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3550 /*
3551 Update next_insert_id if we had already generated a value in this
3552 statement (case of INSERT VALUES(null),(3763),(null):
3553 the last NULL needs to insert 3764, not the value of the first NULL plus
3554 1).
3555 Ignore negative values.
3556 */
3557 if ((longlong) nr > 0 || (table->next_number_field->flags & UNSIGNED_FLAG))
3558 adjust_next_insert_id_after_explicit_value(nr);
3559 insert_id_for_cur_row= 0; // didn't generate anything
3560 DBUG_RETURN(0);
3561 }
3562
3563 if (table->versioned())
3564 {
3565 Field *end= table->vers_end_field();
3566 DBUG_ASSERT(end);
3567 bitmap_set_bit(table->read_set, end->field_index);
3568 if (!end->is_max())
3569 {
3570 if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3571 {
3572 if (!table->next_number_field->real_maybe_null())
3573 DBUG_RETURN(HA_ERR_UNSUPPORTED);
3574 table->next_number_field->set_null();
3575 }
3576 DBUG_RETURN(0);
3577 }
3578 }
3579
3580 // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT
3581 if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3582 table->next_number_field->set_notnull();
3583
3584 if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3585 {
3586 /* next_insert_id is beyond what is reserved, so we reserve more. */
3587 const Discrete_interval *forced=
3588 thd->auto_inc_intervals_forced.get_next();
3589 if (forced != NULL)
3590 {
3591 nr= forced->minimum();
3592 nb_reserved_values= forced->values();
3593 }
3594 else
3595 {
3596 /*
3597 handler::estimation_rows_to_insert was set by
3598 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3599 */
3600 ulonglong nb_desired_values;
3601 /*
3602 If an estimation was given to the engine:
3603 - use it.
3604 - if we already reserved numbers, it means the estimation was
3605 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3606 time, twice that the 3rd time etc.
3607 If no estimation was given, use those increasing defaults from the
3608 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3609 Don't go beyond a max to not reserve "way too much" (because
3610 reservation means potentially losing unused values).
3611 Note that in prelocked mode no estimation is given.
3612 */
3613
3614 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3615 nb_desired_values= estimation_rows_to_insert;
3616 else if ((auto_inc_intervals_count == 0) &&
3617 (thd->lex->many_values.elements > 0))
3618 {
3619 /*
3620 For multi-row inserts, if the bulk inserts cannot be started, the
3621 handler::estimation_rows_to_insert will not be set. But we still
3622 want to reserve the autoinc values.
3623 */
3624 nb_desired_values= thd->lex->many_values.elements;
3625 }
3626 else /* go with the increasing defaults */
3627 {
3628 /* avoid overflow in formula, with this if() */
3629 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3630 {
3631 nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3632 (1 << auto_inc_intervals_count);
3633 set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3634 }
3635 else
3636 nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3637 }
3638 get_auto_increment(variables->auto_increment_offset,
3639 variables->auto_increment_increment,
3640 nb_desired_values, &nr,
3641 &nb_reserved_values);
3642 if (nr == ULONGLONG_MAX)
3643 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3644
3645 /*
3646 That rounding below should not be needed when all engines actually
3647 respect offset and increment in get_auto_increment(). But they don't
3648 so we still do it. Wonder if for the not-first-in-index we should do
3649 it. Hope that this rounding didn't push us out of the interval; even
3650 if it did we cannot do anything about it (calling the engine again
3651 will not help as we inserted no row).
3652 */
3653 nr= compute_next_insert_id(nr-1, variables);
3654 }
3655
3656 if (table->s->next_number_keypart == 0)
3657 {
3658 /* We must defer the appending until "nr" has been possibly truncated */
3659 append= TRUE;
3660 }
3661 else
3662 {
3663 /*
3664 For such auto_increment there is no notion of interval, just a
3665 singleton. The interval is not even stored in
3666 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3667 for next row.
3668 */
3669 DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3670 }
3671 }
3672
3673 if (unlikely(nr == ULONGLONG_MAX))
3674 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3675
3676 DBUG_ASSERT(nr != 0);
3677 DBUG_PRINT("info",("auto_increment: %llu nb_reserved_values: %llu",
3678 nr, append ? nb_reserved_values : 0));
3679
3680 /* Store field without warning (Warning will be printed by insert) */
3681 {
3682 Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE);
3683 tmp= table->next_number_field->store((longlong)nr, TRUE);
3684 }
3685
3686 if (unlikely(tmp)) // Out of range value in store
3687 {
3688 /*
3689 First, test if the query was aborted due to strict mode constraints
3690 or new field value greater than maximum integer value:
3691 */
3692 if (thd->killed == KILL_BAD_DATA ||
3693 nr > table->next_number_field->get_max_int_value())
3694 {
3695 /*
3696 It's better to return an error here than getting a confusing
3697 'duplicate key error' later.
3698 */
3699 result= HA_ERR_AUTOINC_ERANGE;
3700 }
3701 else
3702 {
3703 /*
3704 Field refused this value (overflow) and truncated it, use the result
3705 of the truncation (which is going to be inserted); however we try to
3706 decrease it to honour auto_increment_* variables.
3707 That will shift the left bound of the reserved interval, we don't
3708 bother shifting the right bound (anyway any other value from this
3709 interval will cause a duplicate key).
3710 */
3711 nr= prev_insert_id(table->next_number_field->val_int(), variables);
3712 if (unlikely(table->next_number_field->store((longlong)nr, TRUE)))
3713 nr= table->next_number_field->val_int();
3714 }
3715 }
3716 if (append)
3717 {
3718 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3719 variables->auto_increment_increment);
3720 auto_inc_intervals_count++;
3721 /* Row-based replication does not need to store intervals in binlog */
3722 if (((WSREP_NNULL(thd) && wsrep_emulate_bin_log) ||
3723 mysql_bin_log.is_open()) &&
3724 !thd->is_current_stmt_binlog_format_row())
3725 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3726 append(auto_inc_interval_for_cur_row.minimum(),
3727 auto_inc_interval_for_cur_row.values(),
3728 variables->auto_increment_increment);
3729 }
3730
3731 /*
3732 Record this autogenerated value. If the caller then
3733 succeeds to insert this value, it will call
3734 record_first_successful_insert_id_in_cur_stmt()
3735 which will set first_successful_insert_id_in_cur_stmt if it's not
3736 already set.
3737 */
3738 insert_id_for_cur_row= nr;
3739
3740 if (result) // overflow
3741 DBUG_RETURN(result);
3742
3743 /*
3744 Set next insert id to point to next auto-increment value to be able to
3745 handle multi-row statements.
3746 */
3747 set_next_insert_id(compute_next_insert_id(nr, variables));
3748
3749 DBUG_RETURN(0);
3750 }
3751
3752
3753 /** @brief
3754 MySQL signal that it changed the column bitmap
3755
3756 USAGE
3757 This is for handlers that needs to setup their own column bitmaps.
3758 Normally the handler should set up their own column bitmaps in
3759 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3760 this.
3761
3762 The handler is allowed to do changes to the bitmap after a index_init or
3763 rnd_init() call is made as after this, MySQL will not use the bitmap
3764 for any program logic checking.
3765 */
column_bitmaps_signal()3766 void handler::column_bitmaps_signal()
3767 {
3768 DBUG_ENTER("column_bitmaps_signal");
3769 if (table)
3770 DBUG_PRINT("info", ("read_set: %p write_set: %p",
3771 table->read_set, table->write_set));
3772 DBUG_VOID_RETURN;
3773 }
3774
3775
3776 /** @brief
3777 Reserves an interval of auto_increment values from the handler.
3778
3779 SYNOPSIS
3780 get_auto_increment()
3781 offset
3782 increment
3783 nb_desired_values how many values we want
3784 first_value (OUT) the first value reserved by the handler
3785 nb_reserved_values (OUT) how many values the handler reserved
3786
3787 offset and increment means that we want values to be of the form
3788 offset + N * increment, where N>=0 is integer.
3789 If the function sets *first_value to ~(ulonglong)0 it means an error.
3790 If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3791 reserved to "positive infinite".
3792 */
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3793 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3794 ulonglong nb_desired_values,
3795 ulonglong *first_value,
3796 ulonglong *nb_reserved_values)
3797 {
3798 ulonglong nr;
3799 int error;
3800 MY_BITMAP *old_read_set;
3801 bool rnd_inited= (inited == RND);
3802
3803 if (rnd_inited && ha_rnd_end())
3804 return;
3805
3806 old_read_set= table->prepare_for_keyread(table->s->next_number_index);
3807
3808 if (ha_index_init(table->s->next_number_index, 1))
3809 {
3810 /* This should never happen, assert in debug, and fail in release build */
3811 DBUG_ASSERT(0);
3812 (void) extra(HA_EXTRA_NO_KEYREAD);
3813 *first_value= ULONGLONG_MAX;
3814 if (rnd_inited && ha_rnd_init_with_error(0))
3815 {
3816 //TODO: it would be nice to return here an error
3817 }
3818 return;
3819 }
3820
3821 if (table->s->next_number_keypart == 0)
3822 { // Autoincrement at key-start
3823 error= ha_index_last(table->record[1]);
3824 /*
3825 MySQL implicitly assumes such method does locking (as MySQL decides to
3826 use nr+increment without checking again with the handler, in
3827 handler::update_auto_increment()), so reserves to infinite.
3828 */
3829 *nb_reserved_values= ULONGLONG_MAX;
3830 }
3831 else
3832 {
3833 uchar key[MAX_KEY_LENGTH];
3834 key_copy(key, table->record[0],
3835 table->key_info + table->s->next_number_index,
3836 table->s->next_number_key_offset);
3837 error= ha_index_read_map(table->record[1], key,
3838 make_prev_keypart_map(table->s->
3839 next_number_keypart),
3840 HA_READ_PREFIX_LAST);
3841 /*
3842 MySQL needs to call us for next row: assume we are inserting ("a",null)
3843 here, we return 3, and next this statement will want to insert
3844 ("b",null): there is no reason why ("b",3+1) would be the good row to
3845 insert: maybe it already exists, maybe 3+1 is too large...
3846 */
3847 *nb_reserved_values= 1;
3848 }
3849
3850 if (unlikely(error))
3851 {
3852 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3853 /* No entry found, that's fine */;
3854 else
3855 print_error(error, MYF(0));
3856 nr= 1;
3857 }
3858 else
3859 nr= ((ulonglong) table->next_number_field->
3860 val_int_offset(table->s->rec_buff_length)+1);
3861 ha_index_end();
3862 table->restore_column_maps_after_keyread(old_read_set);
3863 *first_value= nr;
3864 if (rnd_inited && ha_rnd_init_with_error(0))
3865 {
3866 //TODO: it would be nice to return here an error
3867 }
3868 return;
3869 }
3870
3871
ha_release_auto_increment()3872 void handler::ha_release_auto_increment()
3873 {
3874 DBUG_ENTER("ha_release_auto_increment");
3875 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3876 m_lock_type != F_UNLCK ||
3877 (!next_insert_id && !insert_id_for_cur_row));
3878 release_auto_increment();
3879 insert_id_for_cur_row= 0;
3880 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3881 auto_inc_intervals_count= 0;
3882 if (next_insert_id > 0)
3883 {
3884 next_insert_id= 0;
3885 /*
3886 this statement used forced auto_increment values if there were some,
3887 wipe them away for other statements.
3888 */
3889 table->in_use->auto_inc_intervals_forced.empty();
3890 }
3891 DBUG_VOID_RETURN;
3892 }
3893
3894
3895 /**
3896 Construct and emit duplicate key error message using information
3897 from table's record buffer.
3898
3899 @param table TABLE object which record buffer should be used as
3900 source for column values.
3901 @param key Key description.
3902 @param msg Error message template to which key value should be
3903 added.
3904 @param errflag Flags for my_error() call.
3905
3906 @notes
3907 The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly
3908 with old code, the error number is ER_DUP_ENTRY
3909 */
3910
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3911 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3912 {
3913 /* Write the duplicated key in the error message */
3914 char key_buff[MAX_KEY_LENGTH];
3915 String str(key_buff,sizeof(key_buff),system_charset_info);
3916
3917 if (key == NULL)
3918 {
3919 /*
3920 Key is unknown. Should only happen if storage engine reports wrong
3921 duplicate key number.
3922 */
3923 my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "*UNKNOWN*");
3924 }
3925 else
3926 {
3927 if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3928 setup_keyinfo_hash(key);
3929 /* Table is opened and defined at this point */
3930 key_unpack(&str,table, key);
3931 uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3932 if (str.length() >= max_length)
3933 {
3934 str.length(max_length-4);
3935 str.append(STRING_WITH_LEN("..."));
3936 }
3937 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
3938 key->name.str);
3939 if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3940 re_setup_keyinfo_hash(key);
3941 }
3942 }
3943
3944 /**
3945 Construct and emit duplicate key error message using information
3946 from table's record buffer.
3947
3948 @sa print_keydup_error(table, key, msg, errflag).
3949 */
3950
print_keydup_error(TABLE * table,KEY * key,myf errflag)3951 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3952 {
3953 print_keydup_error(table, key,
3954 ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME),
3955 errflag);
3956 }
3957
3958 /**
3959 Print error that we got from handler function.
3960
3961 @note
3962 In case of delete table it's only safe to use the following parts of
3963 the 'table' structure:
3964 - table->s->path
3965 - table->alias
3966 */
3967
3968 #define SET_FATAL_ERROR fatal_error=1
3969
print_error(int error,myf errflag)3970 void handler::print_error(int error, myf errflag)
3971 {
3972 bool fatal_error= 0;
3973 DBUG_ENTER("handler::print_error");
3974 DBUG_PRINT("enter",("error: %d",error));
3975
3976 if (ha_thd()->transaction_rollback_request)
3977 {
3978 /* Ensure this becomes a true error */
3979 errflag&= ~(ME_WARNING | ME_NOTE);
3980 }
3981
3982 int textno= -1; // impossible value
3983 switch (error) {
3984 case EACCES:
3985 textno=ER_OPEN_AS_READONLY;
3986 break;
3987 case EAGAIN:
3988 textno=ER_FILE_USED;
3989 break;
3990 case ENOENT:
3991 case ENOTDIR:
3992 case ELOOP:
3993 textno=ER_FILE_NOT_FOUND;
3994 break;
3995 case ENOSPC:
3996 case HA_ERR_DISK_FULL:
3997 textno= ER_DISK_FULL;
3998 SET_FATAL_ERROR; // Ensure error is logged
3999 break;
4000 case HA_ERR_KEY_NOT_FOUND:
4001 case HA_ERR_NO_ACTIVE_RECORD:
4002 case HA_ERR_RECORD_DELETED:
4003 case HA_ERR_END_OF_FILE:
4004 /*
4005 This errors is not not normally fatal (for example for reads). However
4006 if you get it during an update or delete, then its fatal.
4007 As the user is calling print_error() (which is not done on read), we
4008 assume something when wrong with the update or delete.
4009 */
4010 SET_FATAL_ERROR;
4011 textno=ER_KEY_NOT_FOUND;
4012 break;
4013 case HA_ERR_ABORTED_BY_USER:
4014 {
4015 DBUG_ASSERT(ha_thd()->killed);
4016 ha_thd()->send_kill_message();
4017 DBUG_VOID_RETURN;
4018 }
4019 case HA_ERR_WRONG_MRG_TABLE_DEF:
4020 textno=ER_WRONG_MRG_TABLE;
4021 break;
4022 case HA_ERR_FOUND_DUPP_KEY:
4023 {
4024 if (table)
4025 {
4026 uint key_nr=get_dup_key(error);
4027 if ((int) key_nr >= 0 && key_nr < table->s->keys)
4028 {
4029 print_keydup_error(table, &table->key_info[key_nr], errflag);
4030 DBUG_VOID_RETURN;
4031 }
4032 }
4033 textno=ER_DUP_KEY;
4034 break;
4035 }
4036 case HA_ERR_FOREIGN_DUPLICATE_KEY:
4037 {
4038 char rec_buf[MAX_KEY_LENGTH];
4039 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4040 /* Table is opened and defined at this point */
4041
4042 /*
4043 Just print the subset of fields that are part of the first index,
4044 printing the whole row from there is not easy.
4045 */
4046 key_unpack(&rec, table, &table->key_info[0]);
4047
4048 char child_table_name[NAME_LEN + 1];
4049 char child_key_name[NAME_LEN + 1];
4050 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4051 child_key_name, sizeof(child_key_name)))
4052 {
4053 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4054 table_share->table_name.str, rec.c_ptr_safe(),
4055 child_table_name, child_key_name);
4056 }
4057 else
4058 {
4059 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4060 table_share->table_name.str, rec.c_ptr_safe());
4061 }
4062 DBUG_VOID_RETURN;
4063 }
4064 case HA_ERR_NULL_IN_SPATIAL:
4065 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4066 DBUG_VOID_RETURN;
4067 case HA_ERR_FOUND_DUPP_UNIQUE:
4068 textno=ER_DUP_UNIQUE;
4069 break;
4070 case HA_ERR_RECORD_CHANGED:
4071 /*
4072 This is not fatal error when using HANDLER interface
4073 SET_FATAL_ERROR;
4074 */
4075 textno=ER_CHECKREAD;
4076 break;
4077 case HA_ERR_CRASHED:
4078 SET_FATAL_ERROR;
4079 textno=ER_NOT_KEYFILE;
4080 break;
4081 case HA_ERR_WRONG_IN_RECORD:
4082 SET_FATAL_ERROR;
4083 textno= ER_CRASHED_ON_USAGE;
4084 break;
4085 case HA_ERR_CRASHED_ON_USAGE:
4086 SET_FATAL_ERROR;
4087 textno=ER_CRASHED_ON_USAGE;
4088 break;
4089 case HA_ERR_NOT_A_TABLE:
4090 textno= error;
4091 break;
4092 case HA_ERR_CRASHED_ON_REPAIR:
4093 SET_FATAL_ERROR;
4094 textno=ER_CRASHED_ON_REPAIR;
4095 break;
4096 case HA_ERR_OUT_OF_MEM:
4097 textno=ER_OUT_OF_RESOURCES;
4098 break;
4099 case HA_ERR_WRONG_COMMAND:
4100 my_error(ER_ILLEGAL_HA, MYF(0), table_type(), table_share->db.str,
4101 table_share->table_name.str);
4102 DBUG_VOID_RETURN;
4103 break;
4104 case HA_ERR_OLD_FILE:
4105 textno=ER_OLD_KEYFILE;
4106 break;
4107 case HA_ERR_UNSUPPORTED:
4108 textno=ER_UNSUPPORTED_EXTENSION;
4109 break;
4110 case HA_ERR_RECORD_FILE_FULL:
4111 {
4112 textno=ER_RECORD_FILE_FULL;
4113 /* Write the error message to error log */
4114 errflag|= ME_ERROR_LOG;
4115 break;
4116 }
4117 case HA_ERR_INDEX_FILE_FULL:
4118 {
4119 textno=ER_INDEX_FILE_FULL;
4120 /* Write the error message to error log */
4121 errflag|= ME_ERROR_LOG;
4122 break;
4123 }
4124 case HA_ERR_LOCK_WAIT_TIMEOUT:
4125 textno=ER_LOCK_WAIT_TIMEOUT;
4126 break;
4127 case HA_ERR_LOCK_TABLE_FULL:
4128 textno=ER_LOCK_TABLE_FULL;
4129 break;
4130 case HA_ERR_LOCK_DEADLOCK:
4131 {
4132 String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), system_charset_info);
4133
4134 get_error_message(error, &str);
4135 full_err_msg.append(str);
4136 my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe());
4137 DBUG_VOID_RETURN;
4138 }
4139 case HA_ERR_READ_ONLY_TRANSACTION:
4140 textno=ER_READ_ONLY_TRANSACTION;
4141 break;
4142 case HA_ERR_CANNOT_ADD_FOREIGN:
4143 textno=ER_CANNOT_ADD_FOREIGN;
4144 break;
4145 case HA_ERR_ROW_IS_REFERENCED:
4146 {
4147 String str;
4148 get_error_message(error, &str);
4149 my_printf_error(ER_ROW_IS_REFERENCED_2,
4150 ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED),
4151 errflag, str.c_ptr_safe());
4152 DBUG_VOID_RETURN;
4153 }
4154 case HA_ERR_NO_REFERENCED_ROW:
4155 {
4156 String str;
4157 get_error_message(error, &str);
4158 my_printf_error(ER_NO_REFERENCED_ROW_2,
4159 ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW),
4160 errflag, str.c_ptr_safe());
4161 DBUG_VOID_RETURN;
4162 }
4163 case HA_ERR_TABLE_DEF_CHANGED:
4164 textno=ER_TABLE_DEF_CHANGED;
4165 break;
4166 case HA_ERR_NO_SUCH_TABLE:
4167 my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str,
4168 table_share->table_name.str);
4169 DBUG_VOID_RETURN;
4170 case HA_ERR_RBR_LOGGING_FAILED:
4171 textno= ER_BINLOG_ROW_LOGGING_FAILED;
4172 break;
4173 case HA_ERR_DROP_INDEX_FK:
4174 {
4175 const char *ptr= "???";
4176 uint key_nr= get_dup_key(error);
4177 if ((int) key_nr >= 0)
4178 ptr= table->key_info[key_nr].name.str;
4179 my_error(ER_DROP_INDEX_FK, errflag, ptr);
4180 DBUG_VOID_RETURN;
4181 }
4182 case HA_ERR_TABLE_NEEDS_UPGRADE:
4183 textno= ER_TABLE_NEEDS_UPGRADE;
4184 my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
4185 "TABLE", table_share->table_name.str);
4186 DBUG_VOID_RETURN;
4187 case HA_ERR_NO_PARTITION_FOUND:
4188 textno=ER_WRONG_PARTITION_NAME;
4189 break;
4190 case HA_ERR_TABLE_READONLY:
4191 textno= ER_OPEN_AS_READONLY;
4192 break;
4193 case HA_ERR_AUTOINC_READ_FAILED:
4194 textno= ER_AUTOINC_READ_FAILED;
4195 break;
4196 case HA_ERR_AUTOINC_ERANGE:
4197 textno= error;
4198 my_error(textno, errflag, table->next_number_field->field_name.str,
4199 table->in_use->get_stmt_da()->current_row_for_warning());
4200 DBUG_VOID_RETURN;
4201 break;
4202 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4203 textno= ER_TOO_MANY_CONCURRENT_TRXS;
4204 break;
4205 case HA_ERR_INDEX_COL_TOO_LONG:
4206 textno= ER_INDEX_COLUMN_TOO_LONG;
4207 break;
4208 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4209 textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4210 break;
4211 case HA_ERR_INDEX_CORRUPT:
4212 textno= ER_INDEX_CORRUPT;
4213 break;
4214 case HA_ERR_UNDO_REC_TOO_BIG:
4215 textno= ER_UNDO_RECORD_TOO_BIG;
4216 break;
4217 case HA_ERR_TABLE_IN_FK_CHECK:
4218 textno= ER_TABLE_IN_FK_CHECK;
4219 break;
4220 case HA_ERR_COMMIT_ERROR:
4221 textno= ER_ERROR_DURING_COMMIT;
4222 break;
4223 case HA_ERR_PARTITION_LIST:
4224 my_error(ER_VERS_NOT_ALLOWED, errflag, table->s->db.str, table->s->table_name.str);
4225 DBUG_VOID_RETURN;
4226 default:
4227 {
4228 /* The error was "unknown" to this function.
4229 Ask handler if it has got a message for this error */
4230 bool temporary= FALSE;
4231 String str;
4232 temporary= get_error_message(error, &str);
4233 if (!str.is_empty())
4234 {
4235 const char* engine= table_type();
4236 if (temporary)
4237 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
4238 engine);
4239 else
4240 {
4241 SET_FATAL_ERROR;
4242 my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
4243 }
4244 }
4245 else
4246 my_error(ER_GET_ERRNO, errflag, error, table_type());
4247 DBUG_VOID_RETURN;
4248 }
4249 }
4250 DBUG_ASSERT(textno > 0);
4251 if (unlikely(fatal_error))
4252 {
4253 /* Ensure this becomes a true error */
4254 errflag&= ~(ME_WARNING | ME_NOTE);
4255 if ((debug_assert_if_crashed_table ||
4256 global_system_variables.log_warnings > 1))
4257 {
4258 /*
4259 Log error to log before we crash or if extended warnings are requested
4260 */
4261 errflag|= ME_ERROR_LOG;
4262 }
4263 }
4264
4265 /* if we got an OS error from a file-based engine, specify a path of error */
4266 if (error < HA_ERR_FIRST && bas_ext()[0])
4267 {
4268 char buff[FN_REFLEN];
4269 strxnmov(buff, sizeof(buff),
4270 table_share->normalized_path.str, bas_ext()[0], NULL);
4271 my_error(textno, errflag, buff, error);
4272 }
4273 else
4274 my_error(textno, errflag, table_share->table_name.str, error);
4275 DBUG_VOID_RETURN;
4276 }
4277
4278
4279 /**
4280 Return an error message specific to this handler.
4281
4282 @param error error code previously returned by handler
4283 @param buf pointer to String where to add error message
4284
4285 @return
4286 Returns true if this is a temporary error
4287 */
get_error_message(int error,String * buf)4288 bool handler::get_error_message(int error, String* buf)
4289 {
4290 DBUG_EXECUTE_IF("external_lock_failure",
4291 buf->set_ascii(STRING_WITH_LEN("KABOOM!")););
4292 return FALSE;
4293 }
4294
4295 /**
4296 Check for incompatible collation changes.
4297
4298 @retval
4299 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
4300 @retval
4301 0 No upgrade required.
4302 */
4303
check_collation_compatibility()4304 int handler::check_collation_compatibility()
4305 {
4306 ulong mysql_version= table->s->mysql_version;
4307
4308 if (mysql_version < 50124)
4309 {
4310 KEY *key= table->key_info;
4311 KEY *key_end= key + table->s->keys;
4312 for (; key < key_end; key++)
4313 {
4314 KEY_PART_INFO *key_part= key->key_part;
4315 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4316 for (; key_part < key_part_end; key_part++)
4317 {
4318 if (!key_part->fieldnr)
4319 continue;
4320 Field *field= table->field[key_part->fieldnr - 1];
4321 uint cs_number= field->charset()->number;
4322 if ((mysql_version < 50048 &&
4323 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4324 cs_number == 41 || /* latin7_general_ci - bug #29461 */
4325 cs_number == 42 || /* latin7_general_cs - bug #29461 */
4326 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4327 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4328 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4329 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4330 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4331 (mysql_version < 50124 &&
4332 (cs_number == 33 || /* utf8mb3_general_ci - bug #27877 */
4333 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4334 return HA_ADMIN_NEEDS_UPGRADE;
4335 }
4336 }
4337 }
4338
4339 return 0;
4340 }
4341
4342
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4343 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4344 {
4345 int error;
4346 KEY *keyinfo, *keyend;
4347 KEY_PART_INFO *keypart, *keypartend;
4348
4349 if (table->s->incompatible_version)
4350 return HA_ADMIN_NEEDS_ALTER;
4351
4352 if (!table->s->mysql_version)
4353 {
4354 /* check for blob-in-key error */
4355 keyinfo= table->key_info;
4356 keyend= table->key_info + table->s->keys;
4357 for (; keyinfo < keyend; keyinfo++)
4358 {
4359 keypart= keyinfo->key_part;
4360 keypartend= keypart + keyinfo->user_defined_key_parts;
4361 for (; keypart < keypartend; keypart++)
4362 {
4363 if (!keypart->fieldnr)
4364 continue;
4365 Field *field= table->field[keypart->fieldnr-1];
4366 if (field->type() == MYSQL_TYPE_BLOB)
4367 {
4368 if (check_opt->sql_flags & TT_FOR_UPGRADE)
4369 check_opt->flags= T_MEDIUM;
4370 return HA_ADMIN_NEEDS_CHECK;
4371 }
4372 }
4373 }
4374 }
4375 if (table->s->frm_version < FRM_VER_TRUE_VARCHAR)
4376 return HA_ADMIN_NEEDS_ALTER;
4377
4378 if (unlikely((error= check_collation_compatibility())))
4379 return error;
4380
4381 return check_for_upgrade(check_opt);
4382 }
4383
4384
check_old_types()4385 int handler::check_old_types()
4386 {
4387 Field** field;
4388
4389 if (!table->s->mysql_version)
4390 {
4391 /* check for bad DECIMAL field */
4392 for (field= table->field; (*field); field++)
4393 {
4394 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4395 {
4396 return HA_ADMIN_NEEDS_ALTER;
4397 }
4398 if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4399 {
4400 return HA_ADMIN_NEEDS_ALTER;
4401 }
4402 }
4403 }
4404 return 0;
4405 }
4406
4407
update_frm_version(TABLE * table)4408 static bool update_frm_version(TABLE *table)
4409 {
4410 char path[FN_REFLEN];
4411 File file;
4412 int result= 1;
4413 DBUG_ENTER("update_frm_version");
4414
4415 /*
4416 No need to update frm version in case table was created or checked
4417 by server with the same version. This also ensures that we do not
4418 update frm version for temporary tables as this code doesn't support
4419 temporary tables.
4420 */
4421 if (table->s->mysql_version == MYSQL_VERSION_ID)
4422 DBUG_RETURN(0);
4423
4424 strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4425
4426 if ((file= mysql_file_open(key_file_frm,
4427 path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4428 {
4429 uchar version[4];
4430
4431 int4store(version, MYSQL_VERSION_ID);
4432
4433 if ((result= (int)mysql_file_pwrite(file, (uchar*) version, 4, 51L,
4434 MYF(MY_WME+MY_NABP))))
4435 goto err;
4436
4437 table->s->mysql_version= MYSQL_VERSION_ID;
4438 }
4439 err:
4440 if (file >= 0)
4441 (void) mysql_file_close(file, MYF(MY_WME));
4442 DBUG_RETURN(result);
4443 }
4444
4445
4446
4447 /**
4448 @return
4449 key if error because of duplicated keys
4450 */
get_dup_key(int error)4451 uint handler::get_dup_key(int error)
4452 {
4453 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4454 DBUG_ENTER("handler::get_dup_key");
4455
4456 if (lookup_errkey != (uint)-1)
4457 DBUG_RETURN(errkey= lookup_errkey);
4458
4459 errkey= (uint)-1;
4460 if (error == HA_ERR_FOUND_DUPP_KEY ||
4461 error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
4462 error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4463 error == HA_ERR_DROP_INDEX_FK)
4464 info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4465 DBUG_RETURN(errkey);
4466 }
4467
4468
4469 /**
4470 Delete all files with extension from bas_ext().
4471
4472 @param name Base name of table
4473
4474 @note
4475 We assume that the handler may return more extensions than
4476 was actually used for the file. We also assume that the first
4477 extension is the most important one (see the comment near
4478 handlerton::tablefile_extensions). If this exist and we can't delete
4479 that it, we will abort the delete.
4480 If the first one doesn't exists, we have to try to delete all other
4481 extension as there is chance that the server had crashed between
4482 the delete of the first file and the next
4483
4484 @retval
4485 0 If we successfully deleted at least one file from base_ext and
4486 didn't get any other errors than ENOENT
4487
4488 @retval
4489 !0 Error
4490 */
4491
delete_table(const char * name)4492 int handler::delete_table(const char *name)
4493 {
4494 int saved_error= ENOENT;
4495 bool abort_if_first_file_error= 1;
4496 bool some_file_deleted= 0;
4497 DBUG_ENTER("handler::delete_table");
4498
4499 for (const char **ext= bas_ext(); *ext ; ext++)
4500 {
4501 int err= mysql_file_delete_with_symlink(key_file_misc, name, *ext, MYF(0));
4502 if (err)
4503 {
4504 if (my_errno != ENOENT)
4505 {
4506 saved_error= my_errno;
4507 /*
4508 If error other than file not found on the first existing file,
4509 return the error.
4510 Otherwise delete as much as possible.
4511 */
4512 if (abort_if_first_file_error)
4513 DBUG_RETURN(saved_error);
4514 }
4515 }
4516 else
4517 some_file_deleted= 1;
4518 abort_if_first_file_error= 0;
4519 }
4520 DBUG_RETURN(some_file_deleted && saved_error == ENOENT ? 0 : saved_error);
4521 }
4522
4523
rename_table(const char * from,const char * to)4524 int handler::rename_table(const char * from, const char * to)
4525 {
4526 int error= 0;
4527 const char **ext, **start_ext;
4528 start_ext= bas_ext();
4529 for (ext= start_ext; *ext ; ext++)
4530 {
4531 if (unlikely(rename_file_ext(from, to, *ext)))
4532 {
4533 if ((error=my_errno) != ENOENT)
4534 break;
4535 error= 0;
4536 }
4537 }
4538 if (unlikely(error))
4539 {
4540 /* Try to revert the rename. Ignore errors. */
4541 for (; ext >= start_ext; ext--)
4542 rename_file_ext(to, from, *ext);
4543 }
4544 return error;
4545 }
4546
4547
drop_table(const char * name)4548 void handler::drop_table(const char *name)
4549 {
4550 ha_close();
4551 delete_table(name);
4552 }
4553
4554
4555 /**
4556 Return true if the error from drop table means that the
4557 table didn't exists
4558 */
4559
non_existing_table_error(int error)4560 bool non_existing_table_error(int error)
4561 {
4562 return (error == ENOENT ||
4563 (error == EE_DELETE && my_errno == ENOENT) ||
4564 error == HA_ERR_NO_SUCH_TABLE ||
4565 error == HA_ERR_UNSUPPORTED ||
4566 error == ER_NO_SUCH_TABLE ||
4567 error == ER_NO_SUCH_TABLE_IN_ENGINE ||
4568 error == ER_WRONG_OBJECT);
4569 }
4570
4571
4572 /**
4573 Performs checks upon the table.
4574
4575 @param thd thread doing CHECK TABLE operation
4576 @param check_opt options from the parser
4577
4578 @retval
4579 HA_ADMIN_OK Successful upgrade
4580 @retval
4581 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4582 @retval
4583 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4584 @retval
4585 HA_ADMIN_NOT_IMPLEMENTED
4586 */
4587
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4588 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4589 {
4590 int error;
4591 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4592 m_lock_type != F_UNLCK);
4593
4594 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4595 (check_opt->sql_flags & TT_FOR_UPGRADE))
4596 return 0;
4597
4598 if (table->s->mysql_version < MYSQL_VERSION_ID)
4599 {
4600 if (unlikely((error= check_old_types())))
4601 return error;
4602 error= ha_check_for_upgrade(check_opt);
4603 if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK)))
4604 return error;
4605 if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE)))
4606 return 0;
4607 }
4608 if (unlikely((error= check(thd, check_opt))))
4609 return error;
4610 /* Skip updating frm version if not main handler. */
4611 if (table->file != this)
4612 return error;
4613 return update_frm_version(table);
4614 }
4615
4616 /**
4617 A helper function to mark a transaction read-write,
4618 if it is started.
4619 */
4620
mark_trx_read_write_internal()4621 void handler::mark_trx_read_write_internal()
4622 {
4623 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4624 /*
4625 When a storage engine method is called, the transaction must
4626 have been started, unless it's a DDL call, for which the
4627 storage engine starts the transaction internally, and commits
4628 it internally, without registering in the ha_list.
4629 Unfortunately here we can't know know for sure if the engine
4630 has registered the transaction or not, so we must check.
4631 */
4632 if (ha_info->is_started())
4633 {
4634 /*
4635 table_share can be NULL, for example, in ha_delete_table() or
4636 ha_rename_table().
4637 */
4638 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4639 ha_info->set_trx_read_write();
4640 }
4641 }
4642
4643
4644 /**
4645 Repair table: public interface.
4646
4647 @sa handler::repair()
4648 */
4649
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4650 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4651 {
4652 int result;
4653
4654 mark_trx_read_write();
4655
4656 result= repair(thd, check_opt);
4657 DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4658 ha_table_flags() & HA_CAN_REPAIR);
4659
4660 if (result == HA_ADMIN_OK)
4661 result= update_frm_version(table);
4662 return result;
4663 }
4664
4665
4666 /**
4667 End bulk insert
4668 */
4669
ha_end_bulk_insert()4670 int handler::ha_end_bulk_insert()
4671 {
4672 DBUG_ENTER("handler::ha_end_bulk_insert");
4673 DBUG_EXECUTE_IF("crash_end_bulk_insert",
4674 { extra(HA_EXTRA_FLUSH) ; DBUG_SUICIDE();});
4675 estimation_rows_to_insert= 0;
4676 DBUG_RETURN(end_bulk_insert());
4677 }
4678
4679 /**
4680 Bulk update row: public interface.
4681
4682 @sa handler::bulk_update_row()
4683 */
4684
4685 int
ha_bulk_update_row(const uchar * old_data,const uchar * new_data,ha_rows * dup_key_found)4686 handler::ha_bulk_update_row(const uchar *old_data, const uchar *new_data,
4687 ha_rows *dup_key_found)
4688 {
4689 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4690 m_lock_type == F_WRLCK);
4691 mark_trx_read_write();
4692
4693 return bulk_update_row(old_data, new_data, dup_key_found);
4694 }
4695
4696
4697 /**
4698 Delete all rows: public interface.
4699
4700 @sa handler::delete_all_rows()
4701 */
4702
4703 int
ha_delete_all_rows()4704 handler::ha_delete_all_rows()
4705 {
4706 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4707 m_lock_type == F_WRLCK);
4708 mark_trx_read_write();
4709
4710 return delete_all_rows();
4711 }
4712
4713
4714 /**
4715 Truncate table: public interface.
4716
4717 @sa handler::truncate()
4718 */
4719
4720 int
ha_truncate()4721 handler::ha_truncate()
4722 {
4723 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4724 m_lock_type == F_WRLCK);
4725 mark_trx_read_write();
4726
4727 return truncate();
4728 }
4729
4730
4731 /**
4732 Reset auto increment: public interface.
4733
4734 @sa handler::reset_auto_increment()
4735 */
4736
4737 int
ha_reset_auto_increment(ulonglong value)4738 handler::ha_reset_auto_increment(ulonglong value)
4739 {
4740 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4741 m_lock_type == F_WRLCK);
4742 mark_trx_read_write();
4743
4744 return reset_auto_increment(value);
4745 }
4746
4747
4748 /**
4749 Optimize table: public interface.
4750
4751 @sa handler::optimize()
4752 */
4753
4754 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4755 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4756 {
4757 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4758 m_lock_type == F_WRLCK);
4759 mark_trx_read_write();
4760
4761 return optimize(thd, check_opt);
4762 }
4763
4764
4765 /**
4766 Analyze table: public interface.
4767
4768 @sa handler::analyze()
4769 */
4770
4771 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4772 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4773 {
4774 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4775 m_lock_type != F_UNLCK);
4776 mark_trx_read_write();
4777
4778 return analyze(thd, check_opt);
4779 }
4780
4781
4782 /**
4783 Check and repair table: public interface.
4784
4785 @sa handler::check_and_repair()
4786 */
4787
4788 bool
ha_check_and_repair(THD * thd)4789 handler::ha_check_and_repair(THD *thd)
4790 {
4791 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4792 m_lock_type == F_UNLCK);
4793 mark_trx_read_write();
4794
4795 return check_and_repair(thd);
4796 }
4797
4798
4799 /**
4800 Disable indexes: public interface.
4801
4802 @sa handler::disable_indexes()
4803 */
4804
4805 int
ha_disable_indexes(uint mode)4806 handler::ha_disable_indexes(uint mode)
4807 {
4808 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4809 m_lock_type != F_UNLCK);
4810 mark_trx_read_write();
4811
4812 return disable_indexes(mode);
4813 }
4814
4815
4816 /**
4817 Enable indexes: public interface.
4818
4819 @sa handler::enable_indexes()
4820 */
4821
4822 int
ha_enable_indexes(uint mode)4823 handler::ha_enable_indexes(uint mode)
4824 {
4825 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4826 m_lock_type != F_UNLCK);
4827 mark_trx_read_write();
4828
4829 return enable_indexes(mode);
4830 }
4831
4832
4833 /**
4834 Discard or import tablespace: public interface.
4835
4836 @sa handler::discard_or_import_tablespace()
4837 */
4838
4839 int
ha_discard_or_import_tablespace(my_bool discard)4840 handler::ha_discard_or_import_tablespace(my_bool discard)
4841 {
4842 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4843 m_lock_type == F_WRLCK);
4844 mark_trx_read_write();
4845
4846 return discard_or_import_tablespace(discard);
4847 }
4848
4849
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4850 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4851 Alter_inplace_info *ha_alter_info)
4852 {
4853 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4854 m_lock_type != F_UNLCK);
4855 mark_trx_read_write();
4856
4857 return prepare_inplace_alter_table(altered_table, ha_alter_info);
4858 }
4859
4860
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4861 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4862 Alter_inplace_info *ha_alter_info,
4863 bool commit)
4864 {
4865 /*
4866 At this point we should have an exclusive metadata lock on the table.
4867 The exception is if we're about to roll back changes (commit= false).
4868 In this case, we might be rolling back after a failed lock upgrade,
4869 so we could be holding the same lock level as for inplace_alter_table().
4870 */
4871 DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4872 table->s->db.str,
4873 table->s->table_name.str,
4874 MDL_EXCLUSIVE) ||
4875 !commit);
4876
4877 return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4878 }
4879
4880
4881 /*
4882 Default implementation to support in-place alter table
4883 and old online add/drop index API
4884 */
4885
4886 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4887 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4888 Alter_inplace_info *ha_alter_info)
4889 {
4890 DBUG_ENTER("handler::check_if_supported_inplace_alter");
4891
4892 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4893
4894 if (altered_table->versioned(VERS_TIMESTAMP))
4895 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4896
4897 alter_table_operations inplace_offline_operations=
4898 ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE |
4899 ALTER_COLUMN_NAME |
4900 ALTER_RENAME_COLUMN |
4901 ALTER_CHANGE_COLUMN_DEFAULT |
4902 ALTER_COLUMN_DEFAULT |
4903 ALTER_COLUMN_OPTION |
4904 ALTER_CHANGE_CREATE_OPTION |
4905 ALTER_DROP_CHECK_CONSTRAINT |
4906 ALTER_PARTITIONED |
4907 ALTER_VIRTUAL_GCOL_EXPR |
4908 ALTER_RENAME |
4909 ALTER_RENAME_INDEX;
4910
4911 /* Is there at least one operation that requires copy algorithm? */
4912 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4913 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4914
4915 /*
4916 The following checks for changes related to ALTER_OPTIONS
4917
4918 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4919 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4920 change column charsets and so not supported in-place through
4921 old API.
4922
4923 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4924 not supported as in-place operations in old API either.
4925 */
4926 if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4927 HA_CREATE_USED_DEFAULT_CHARSET |
4928 HA_CREATE_USED_PACK_KEYS |
4929 HA_CREATE_USED_CHECKSUM |
4930 HA_CREATE_USED_MAX_ROWS) ||
4931 (table->s->row_type != create_info->row_type))
4932 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4933
4934 uint table_changes= (ha_alter_info->handler_flags &
4935 ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) ?
4936 IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4937 if (table->file->check_if_incompatible_data(create_info, table_changes)
4938 == COMPATIBLE_DATA_YES)
4939 DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
4940
4941 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4942 }
4943
Alter_inplace_info(HA_CREATE_INFO * create_info_arg,Alter_info * alter_info_arg,KEY * key_info_arg,uint key_count_arg,partition_info * modified_part_info_arg,bool ignore_arg,bool error_non_empty)4944 Alter_inplace_info::Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
4945 Alter_info *alter_info_arg,
4946 KEY *key_info_arg, uint key_count_arg,
4947 partition_info *modified_part_info_arg,
4948 bool ignore_arg, bool error_non_empty)
4949 : create_info(create_info_arg),
4950 alter_info(alter_info_arg),
4951 key_info_buffer(key_info_arg),
4952 key_count(key_count_arg),
4953 index_drop_count(0),
4954 index_drop_buffer(nullptr),
4955 index_add_count(0),
4956 index_add_buffer(nullptr),
4957 rename_keys(current_thd->mem_root),
4958 handler_ctx(nullptr),
4959 group_commit_ctx(nullptr),
4960 handler_flags(0),
4961 modified_part_info(modified_part_info_arg),
4962 ignore(ignore_arg),
4963 online(false),
4964 unsupported_reason(nullptr),
4965 error_if_not_empty(error_non_empty)
4966 {}
4967
report_unsupported_error(const char * not_supported,const char * try_instead) const4968 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4969 const char *try_instead) const
4970 {
4971 if (unsupported_reason == NULL)
4972 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4973 not_supported, try_instead);
4974 else
4975 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4976 not_supported, unsupported_reason, try_instead);
4977 }
4978
4979
4980 /**
4981 Rename table: public interface.
4982
4983 @sa handler::rename_table()
4984 */
4985
4986 int
ha_rename_table(const char * from,const char * to)4987 handler::ha_rename_table(const char *from, const char *to)
4988 {
4989 DBUG_ASSERT(m_lock_type == F_UNLCK);
4990 mark_trx_read_write();
4991
4992 return rename_table(from, to);
4993 }
4994
4995
4996 /**
4997 Drop table in the engine: public interface.
4998
4999 @sa handler::drop_table()
5000
5001 The difference between this and delete_table() is that the table is open in
5002 drop_table().
5003 */
5004
5005 void
ha_drop_table(const char * name)5006 handler::ha_drop_table(const char *name)
5007 {
5008 DBUG_ASSERT(m_lock_type == F_UNLCK);
5009 if (check_if_updates_are_ignored("DROP"))
5010 return;
5011
5012 mark_trx_read_write();
5013 drop_table(name);
5014 }
5015
5016
5017 /**
5018 Structure used during force drop table.
5019 */
5020
5021 struct st_force_drop_table_params
5022 {
5023 const char *path;
5024 const LEX_CSTRING *db;
5025 const LEX_CSTRING *alias;
5026 int error;
5027 bool discovering;
5028 };
5029
5030
5031 /**
5032 Try to delete table from a given plugin
5033 Table types with discovery is ignored as these .frm files would have
5034 been created during discovery and thus doesn't need to be found
5035 for drop table force
5036 */
5037
delete_table_force(THD * thd,plugin_ref plugin,void * arg)5038 static my_bool delete_table_force(THD *thd, plugin_ref plugin, void *arg)
5039 {
5040 handlerton *hton = plugin_hton(plugin);
5041 st_force_drop_table_params *param = (st_force_drop_table_params *)arg;
5042
5043 if (param->discovering == (hton->discover_table != NULL) &&
5044 !(thd->slave_thread && (hton->flags & HTON_IGNORE_UPDATES)))
5045 {
5046 int error;
5047 error= ha_delete_table(thd, hton, param->path, param->db, param->alias, 0);
5048 if (error > 0 && !non_existing_table_error(error))
5049 param->error= error;
5050 if (error == 0)
5051 {
5052 if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
5053 thd->replication_flags |= OPTION_IF_EXISTS;
5054 param->error= 0;
5055 return TRUE; // Table was deleted
5056 }
5057 }
5058 return FALSE;
5059 }
5060
5061 /**
5062 @brief
5063 Traverse all plugins to delete table when .frm file is missing.
5064
5065 @return -1 Table was not found in any engine
5066 @return 0 Table was found in some engine and delete succeded
5067 @return # Error from first engine that had a table but didn't succeed to
5068 delete the table
5069 @return HA_ERR_ROW_IS_REFERENCED if foreign key reference is encountered,
5070
5071 */
5072
ha_delete_table_force(THD * thd,const char * path,const LEX_CSTRING * db,const LEX_CSTRING * alias)5073 int ha_delete_table_force(THD *thd, const char *path, const LEX_CSTRING *db,
5074 const LEX_CSTRING *alias)
5075 {
5076 st_force_drop_table_params param;
5077 Table_exists_error_handler no_such_table_handler;
5078 DBUG_ENTER("ha_delete_table_force");
5079
5080 param.path= path;
5081 param.db= db;
5082 param.alias= alias;
5083 param.error= -1; // Table not found
5084 param.discovering= true;
5085
5086 thd->push_internal_handler(&no_such_table_handler);
5087 if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN,
5088 ¶m))
5089 param.error= 0; // Delete succeded
5090 else
5091 {
5092 param.discovering= false;
5093 if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN,
5094 ¶m))
5095 param.error= 0; // Delete succeded
5096 }
5097 thd->pop_internal_handler();
5098 DBUG_RETURN(param.error);
5099 }
5100
5101
5102 /**
5103 Create a table in the engine: public interface.
5104
5105 @sa handler::create()
5106 */
5107
5108 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info_arg)5109 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg)
5110 {
5111 DBUG_ASSERT(m_lock_type == F_UNLCK);
5112 mark_trx_read_write();
5113 int error= create(name, form, info_arg);
5114 if (!error &&
5115 !(info_arg->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER)))
5116 mysql_audit_create_table(form);
5117 return error;
5118 }
5119
5120
5121 /**
5122 Create handler files for CREATE TABLE: public interface.
5123
5124 @sa handler::create_partitioning_metadata()
5125 */
5126
5127 int
ha_create_partitioning_metadata(const char * name,const char * old_name,chf_create_flags action_flag)5128 handler::ha_create_partitioning_metadata(const char *name,
5129 const char *old_name,
5130 chf_create_flags action_flag)
5131 {
5132 /*
5133 Normally this is done when unlocked, but in fast_alter_partition_table,
5134 it is done on an already locked handler when preparing to alter/rename
5135 partitions.
5136 */
5137 DBUG_ASSERT(m_lock_type == F_UNLCK ||
5138 (!old_name && strcmp(name, table_share->path.str)));
5139
5140
5141 mark_trx_read_write();
5142 return create_partitioning_metadata(name, old_name, action_flag);
5143 }
5144
5145
5146 /**
5147 Change partitions: public interface.
5148
5149 @sa handler::change_partitions()
5150 */
5151
5152 int
ha_change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)5153 handler::ha_change_partitions(HA_CREATE_INFO *create_info,
5154 const char *path,
5155 ulonglong * const copied,
5156 ulonglong * const deleted,
5157 const uchar *pack_frm_data,
5158 size_t pack_frm_len)
5159 {
5160 /*
5161 Must have at least RDLCK or be a TMP table. Read lock is needed to read
5162 from current partitions and write lock will be taken on new partitions.
5163 */
5164 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
5165 m_lock_type != F_UNLCK);
5166
5167 mark_trx_read_write();
5168
5169 return change_partitions(create_info, path, copied, deleted,
5170 pack_frm_data, pack_frm_len);
5171 }
5172
5173
5174 /**
5175 Drop partitions: public interface.
5176
5177 @sa handler::drop_partitions()
5178 */
5179
5180 int
ha_drop_partitions(const char * path)5181 handler::ha_drop_partitions(const char *path)
5182 {
5183 DBUG_ASSERT(!table->db_stat);
5184
5185 mark_trx_read_write();
5186
5187 return drop_partitions(path);
5188 }
5189
5190
5191 /**
5192 Rename partitions: public interface.
5193
5194 @sa handler::rename_partitions()
5195 */
5196
5197 int
ha_rename_partitions(const char * path)5198 handler::ha_rename_partitions(const char *path)
5199 {
5200 DBUG_ASSERT(!table->db_stat);
5201
5202 mark_trx_read_write();
5203
5204 return rename_partitions(path);
5205 }
5206
5207
5208 /**
5209 Tell the storage engine that it is allowed to "disable transaction" in the
5210 handler. It is a hint that ACID is not required - it was used in NDB for
5211 ALTER TABLE, for example, when data are copied to temporary table.
5212 A storage engine may treat this hint any way it likes. NDB for example
5213 started to commit every now and then automatically.
5214 This hint can be safely ignored.
5215 */
ha_enable_transaction(THD * thd,bool on)5216 int ha_enable_transaction(THD *thd, bool on)
5217 {
5218 int error=0;
5219 DBUG_ENTER("ha_enable_transaction");
5220 DBUG_PRINT("enter", ("on: %d", (int) on));
5221
5222 if ((thd->transaction->on= on))
5223 {
5224 /*
5225 Now all storage engines should have transaction handling enabled.
5226 But some may have it enabled all the time - "disabling" transactions
5227 is an optimization hint that storage engine is free to ignore.
5228 So, let's commit an open transaction (if any) now.
5229 */
5230 if (likely(!(error= ha_commit_trans(thd, 0))))
5231 error= trans_commit_implicit(thd);
5232 }
5233 DBUG_RETURN(error);
5234 }
5235
index_next_same(uchar * buf,const uchar * key,uint keylen)5236 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5237 {
5238 int error;
5239 DBUG_ENTER("handler::index_next_same");
5240 if (!(error=index_next(buf)))
5241 {
5242 my_ptrdiff_t ptrdiff= buf - table->record[0];
5243 uchar *UNINIT_VAR(save_record_0);
5244 KEY *UNINIT_VAR(key_info);
5245 KEY_PART_INFO *UNINIT_VAR(key_part);
5246 KEY_PART_INFO *UNINIT_VAR(key_part_end);
5247
5248 /*
5249 key_cmp_if_same() compares table->record[0] against 'key'.
5250 In parts it uses table->record[0] directly, in parts it uses
5251 field objects with their local pointers into table->record[0].
5252 If 'buf' is distinct from table->record[0], we need to move
5253 all record references. This is table->record[0] itself and
5254 the field pointers of the fields used in this key.
5255 */
5256 if (ptrdiff)
5257 {
5258 save_record_0= table->record[0];
5259 table->record[0]= buf;
5260 key_info= table->key_info + active_index;
5261 key_part= key_info->key_part;
5262 key_part_end= key_part + key_info->user_defined_key_parts;
5263 for (; key_part < key_part_end; key_part++)
5264 {
5265 DBUG_ASSERT(key_part->field);
5266 key_part->field->move_field_offset(ptrdiff);
5267 }
5268 }
5269
5270 if (key_cmp_if_same(table, key, active_index, keylen))
5271 {
5272 table->status=STATUS_NOT_FOUND;
5273 error=HA_ERR_END_OF_FILE;
5274 }
5275
5276 /* Move back if necessary. */
5277 if (ptrdiff)
5278 {
5279 table->record[0]= save_record_0;
5280 for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5281 key_part->field->move_field_offset(-ptrdiff);
5282 }
5283 }
5284 DBUG_PRINT("return",("%i", error));
5285 DBUG_RETURN(error);
5286 }
5287
5288
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)5289 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
5290 uint part_id)
5291 {
5292 info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
5293 HA_STATUS_NO_LOCK);
5294 stat_info->records= stats.records;
5295 stat_info->mean_rec_length= stats.mean_rec_length;
5296 stat_info->data_file_length= stats.data_file_length;
5297 stat_info->max_data_file_length= stats.max_data_file_length;
5298 stat_info->index_file_length= stats.index_file_length;
5299 stat_info->max_index_file_length=stats.max_index_file_length;
5300 stat_info->delete_length= stats.delete_length;
5301 stat_info->create_time= stats.create_time;
5302 stat_info->update_time= stats.update_time;
5303 stat_info->check_time= stats.check_time;
5304 stat_info->check_sum= stats.checksum;
5305 stat_info->check_sum_null= stats.checksum_null;
5306 }
5307
5308
5309 /*
5310 Updates the global table stats with the TABLE this handler represents
5311 */
5312
update_global_table_stats()5313 void handler::update_global_table_stats()
5314 {
5315 TABLE_STATS * table_stats;
5316
5317 status_var_add(table->in_use->status_var.rows_read, rows_read);
5318 DBUG_ASSERT(rows_tmp_read == 0);
5319
5320 if (!table->in_use->userstat_running)
5321 {
5322 rows_read= rows_changed= 0;
5323 return;
5324 }
5325
5326 if (rows_read + rows_changed == 0)
5327 return; // Nothing to update.
5328
5329 DBUG_ASSERT(table->s);
5330 DBUG_ASSERT(table->s->table_cache_key.str);
5331
5332 mysql_mutex_lock(&LOCK_global_table_stats);
5333 /* Gets the global table stats, creating one if necessary. */
5334 if (!(table_stats= (TABLE_STATS*)
5335 my_hash_search(&global_table_stats,
5336 (uchar*) table->s->table_cache_key.str,
5337 table->s->table_cache_key.length)))
5338 {
5339 if (!(table_stats = ((TABLE_STATS*)
5340 my_malloc(PSI_INSTRUMENT_ME, sizeof(TABLE_STATS),
5341 MYF(MY_WME | MY_ZEROFILL)))))
5342 {
5343 /* Out of memory error already given */
5344 goto end;
5345 }
5346 memcpy(table_stats->table, table->s->table_cache_key.str,
5347 table->s->table_cache_key.length);
5348 table_stats->table_name_length= (uint)table->s->table_cache_key.length;
5349 table_stats->engine_type= ht->db_type;
5350 /* No need to set variables to 0, as we use MY_ZEROFILL above */
5351
5352 if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
5353 {
5354 /* Out of memory error is already given */
5355 my_free(table_stats);
5356 goto end;
5357 }
5358 }
5359 // Updates the global table stats.
5360 table_stats->rows_read+= rows_read;
5361 table_stats->rows_changed+= rows_changed;
5362 table_stats->rows_changed_x_indexes+= (rows_changed *
5363 (table->s->keys ? table->s->keys :
5364 1));
5365 rows_read= rows_changed= 0;
5366 end:
5367 mysql_mutex_unlock(&LOCK_global_table_stats);
5368 }
5369
5370
5371 /*
5372 Updates the global index stats with this handler's accumulated index reads.
5373 */
5374
update_global_index_stats()5375 void handler::update_global_index_stats()
5376 {
5377 DBUG_ASSERT(table->s);
5378
5379 if (!table->in_use->userstat_running)
5380 {
5381 /* Reset all index read values */
5382 bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
5383 return;
5384 }
5385
5386 for (uint index = 0; index < table->s->keys; index++)
5387 {
5388 if (index_rows_read[index])
5389 {
5390 INDEX_STATS* index_stats;
5391 size_t key_length;
5392 KEY *key_info = &table->key_info[index]; // Rows were read using this
5393
5394 DBUG_ASSERT(key_info->cache_name);
5395 if (!key_info->cache_name)
5396 continue;
5397 key_length= table->s->table_cache_key.length + key_info->name.length + 1;
5398 mysql_mutex_lock(&LOCK_global_index_stats);
5399 // Gets the global index stats, creating one if necessary.
5400 if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
5401 key_info->cache_name,
5402 key_length)))
5403 {
5404 if (!(index_stats = ((INDEX_STATS*)
5405 my_malloc(PSI_INSTRUMENT_ME, sizeof(INDEX_STATS),
5406 MYF(MY_WME | MY_ZEROFILL)))))
5407 goto end; // Error is already given
5408
5409 memcpy(index_stats->index, key_info->cache_name, key_length);
5410 index_stats->index_name_length= key_length;
5411 if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
5412 {
5413 my_free(index_stats);
5414 goto end;
5415 }
5416 }
5417 /* Updates the global index stats. */
5418 index_stats->rows_read+= index_rows_read[index];
5419 index_rows_read[index]= 0;
5420 end:
5421 mysql_mutex_unlock(&LOCK_global_index_stats);
5422 }
5423 }
5424 }
5425
5426
flush_checksum(ha_checksum * row_crc,uchar ** checksum_start,size_t * checksum_length)5427 static void flush_checksum(ha_checksum *row_crc, uchar **checksum_start,
5428 size_t *checksum_length)
5429 {
5430 if (*checksum_start)
5431 {
5432 *row_crc= my_checksum(*row_crc, *checksum_start, *checksum_length);
5433 *checksum_start= NULL;
5434 *checksum_length= 0;
5435 }
5436 }
5437
5438
5439 /* calculating table's checksum */
calculate_checksum()5440 int handler::calculate_checksum()
5441 {
5442 int error;
5443 THD *thd=ha_thd();
5444 DBUG_ASSERT(table->s->last_null_bit_pos < 8);
5445 uchar null_mask= table->s->last_null_bit_pos
5446 ? 256 - (1 << table->s->last_null_bit_pos) : 0;
5447
5448 table->use_all_stored_columns();
5449 stats.checksum= 0;
5450
5451 if ((error= ha_rnd_init(1)))
5452 return error;
5453
5454 for (;;)
5455 {
5456 if (thd->killed)
5457 return HA_ERR_ABORTED_BY_USER;
5458
5459 ha_checksum row_crc= 0;
5460 error= ha_rnd_next(table->record[0]);
5461 if (error)
5462 break;
5463
5464 if (table->s->null_bytes)
5465 {
5466 /* fix undefined null bits */
5467 table->record[0][table->s->null_bytes-1] |= null_mask;
5468 if (!(table->s->db_create_options & HA_OPTION_PACK_RECORD))
5469 table->record[0][0] |= 1;
5470
5471 row_crc= my_checksum(row_crc, table->record[0], table->s->null_bytes);
5472 }
5473
5474 uchar *checksum_start= NULL;
5475 size_t checksum_length= 0;
5476 for (uint i= 0; i < table->s->fields; i++ )
5477 {
5478 Field *f= table->field[i];
5479
5480 if (! thd->variables.old_mode && f->is_real_null(0))
5481 {
5482 flush_checksum(&row_crc, &checksum_start, &checksum_length);
5483 continue;
5484 }
5485 /*
5486 BLOB and VARCHAR have pointers in their field, we must convert
5487 to string; GEOMETRY is implemented on top of BLOB.
5488 BIT may store its data among NULL bits, convert as well.
5489 */
5490 switch (f->type()) {
5491 case MYSQL_TYPE_BLOB:
5492 case MYSQL_TYPE_VARCHAR:
5493 case MYSQL_TYPE_GEOMETRY:
5494 case MYSQL_TYPE_BIT:
5495 {
5496 flush_checksum(&row_crc, &checksum_start, &checksum_length);
5497 String tmp;
5498 f->val_str(&tmp);
5499 row_crc= my_checksum(row_crc, (uchar*) tmp.ptr(), tmp.length());
5500 break;
5501 }
5502 default:
5503 if (!checksum_start)
5504 checksum_start= f->ptr;
5505 DBUG_ASSERT(checksum_start + checksum_length == f->ptr);
5506 checksum_length+= f->pack_length();
5507 break;
5508 }
5509 }
5510 flush_checksum(&row_crc, &checksum_start, &checksum_length);
5511
5512 stats.checksum+= row_crc;
5513 }
5514 ha_rnd_end();
5515 return error == HA_ERR_END_OF_FILE ? 0 : error;
5516 }
5517
5518
5519 /****************************************************************************
5520 ** Some general functions that isn't in the handler class
5521 ****************************************************************************/
5522
5523 /**
5524 Initiates table-file and calls appropriate database-creator.
5525
5526 @retval
5527 0 ok
5528 @retval
5529 1 error
5530 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,LEX_CUSTRING * frm)5531 int ha_create_table(THD *thd, const char *path,
5532 const char *db, const char *table_name,
5533 HA_CREATE_INFO *create_info, LEX_CUSTRING *frm)
5534 {
5535 int error= 1;
5536 TABLE table;
5537 char name_buff[FN_REFLEN];
5538 const char *name;
5539 TABLE_SHARE share;
5540 Abort_on_warning_instant_set old_abort_on_warning(thd, 0);
5541 bool temp_table __attribute__((unused)) =
5542 create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER);
5543 DBUG_ENTER("ha_create_table");
5544
5545 init_tmp_table_share(thd, &share, db, 0, table_name, path);
5546
5547 if (frm)
5548 {
5549 bool write_frm_now= !create_info->db_type->discover_table &&
5550 !create_info->tmp_table();
5551
5552 share.frm_image= frm;
5553
5554 // open an frm image
5555 if (share.init_from_binary_frm_image(thd, write_frm_now,
5556 frm->str, frm->length))
5557 goto err;
5558 }
5559 else
5560 {
5561 // open an frm file
5562 share.db_plugin= ha_lock_engine(thd, create_info->db_type);
5563
5564 if (open_table_def(thd, &share))
5565 goto err;
5566 }
5567
5568 share.m_psi= PSI_CALL_get_table_share(temp_table, &share);
5569
5570 if (open_table_from_share(thd, &share, &empty_clex_str, 0, READ_ALL, 0,
5571 &table, true))
5572 goto err;
5573
5574 update_create_info_from_table(create_info, &table);
5575
5576 name= get_canonical_filename(table.file, share.path.str, name_buff);
5577
5578 error= table.file->ha_create(name, &table, create_info);
5579
5580 if (unlikely(error))
5581 {
5582 if (!thd->is_error())
5583 my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table_name, error);
5584 table.file->print_error(error, MYF(ME_WARNING));
5585 PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length,
5586 share.table_name.str, (uint)share.table_name.length);
5587 }
5588
5589 (void) closefrm(&table);
5590
5591 err:
5592 free_table_share(&share);
5593 DBUG_RETURN(error != 0);
5594 }
5595
init()5596 void st_ha_check_opt::init()
5597 {
5598 flags= sql_flags= 0;
5599 start_time= my_time(0);
5600 }
5601
5602
5603 /*****************************************************************************
5604 Key cache handling.
5605
5606 This code is only relevant for ISAM/MyISAM tables
5607
5608 key_cache->cache may be 0 only in the case where a key cache is not
5609 initialized or when we where not able to init the key cache in a previous
5610 call to ha_init_key_cache() (probably out of memory)
5611 *****************************************************************************/
5612
5613 /**
5614 Init a key cache if it has not been initied before.
5615 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache,void * unused)5616 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused
5617 __attribute__((unused)))
5618 {
5619 DBUG_ENTER("ha_init_key_cache");
5620
5621 if (!key_cache->key_cache_inited)
5622 {
5623 mysql_mutex_lock(&LOCK_global_system_variables);
5624 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5625 uint tmp_block_size= (uint) key_cache->param_block_size;
5626 uint division_limit= (uint)key_cache->param_division_limit;
5627 uint age_threshold= (uint)key_cache->param_age_threshold;
5628 uint partitions= (uint)key_cache->param_partitions;
5629 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5630 mysql_mutex_unlock(&LOCK_global_system_variables);
5631 DBUG_RETURN(!init_key_cache(key_cache,
5632 tmp_block_size,
5633 tmp_buff_size,
5634 division_limit, age_threshold,
5635 changed_blocks_hash_size,
5636 partitions));
5637 }
5638 DBUG_RETURN(0);
5639 }
5640
5641
5642 /**
5643 Resize key cache.
5644 */
ha_resize_key_cache(KEY_CACHE * key_cache)5645 int ha_resize_key_cache(KEY_CACHE *key_cache)
5646 {
5647 DBUG_ENTER("ha_resize_key_cache");
5648
5649 if (key_cache->key_cache_inited)
5650 {
5651 mysql_mutex_lock(&LOCK_global_system_variables);
5652 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5653 long tmp_block_size= (long) key_cache->param_block_size;
5654 uint division_limit= (uint)key_cache->param_division_limit;
5655 uint age_threshold= (uint)key_cache->param_age_threshold;
5656 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5657 mysql_mutex_unlock(&LOCK_global_system_variables);
5658 DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5659 tmp_buff_size,
5660 division_limit, age_threshold,
5661 changed_blocks_hash_size));
5662 }
5663 DBUG_RETURN(0);
5664 }
5665
5666
5667 /**
5668 Change parameters for key cache (like division_limit)
5669 */
ha_change_key_cache_param(KEY_CACHE * key_cache)5670 int ha_change_key_cache_param(KEY_CACHE *key_cache)
5671 {
5672 DBUG_ENTER("ha_change_key_cache_param");
5673
5674 if (key_cache->key_cache_inited)
5675 {
5676 mysql_mutex_lock(&LOCK_global_system_variables);
5677 uint division_limit= (uint)key_cache->param_division_limit;
5678 uint age_threshold= (uint)key_cache->param_age_threshold;
5679 mysql_mutex_unlock(&LOCK_global_system_variables);
5680 change_key_cache_param(key_cache, division_limit, age_threshold);
5681 }
5682 DBUG_RETURN(0);
5683 }
5684
5685
5686 /**
5687 Repartition key cache
5688 */
ha_repartition_key_cache(KEY_CACHE * key_cache)5689 int ha_repartition_key_cache(KEY_CACHE *key_cache)
5690 {
5691 DBUG_ENTER("ha_repartition_key_cache");
5692
5693 if (key_cache->key_cache_inited)
5694 {
5695 mysql_mutex_lock(&LOCK_global_system_variables);
5696 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5697 long tmp_block_size= (long) key_cache->param_block_size;
5698 uint division_limit= (uint)key_cache->param_division_limit;
5699 uint age_threshold= (uint)key_cache->param_age_threshold;
5700 uint partitions= (uint)key_cache->param_partitions;
5701 uint changed_blocks_hash_size= (uint)key_cache->changed_blocks_hash_size;
5702 mysql_mutex_unlock(&LOCK_global_system_variables);
5703 DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
5704 tmp_buff_size,
5705 division_limit, age_threshold,
5706 changed_blocks_hash_size,
5707 partitions));
5708 }
5709 DBUG_RETURN(0);
5710 }
5711
5712
5713 /**
5714 Move all tables from one key cache to another one.
5715 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5716 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5717 KEY_CACHE *new_key_cache)
5718 {
5719 mi_change_key_cache(old_key_cache, new_key_cache);
5720 return 0;
5721 }
5722
5723
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5724 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5725 void *arg)
5726 {
5727 TABLE_SHARE *share= (TABLE_SHARE *)arg;
5728 handlerton *hton= plugin_hton(plugin);
5729 if (hton->discover_table)
5730 {
5731 share->db_plugin= plugin;
5732 int error= hton->discover_table(hton, thd, share);
5733 if (error != HA_ERR_NO_SUCH_TABLE)
5734 {
5735 if (unlikely(error))
5736 {
5737 if (!share->error)
5738 {
5739 share->error= OPEN_FRM_ERROR_ALREADY_ISSUED;
5740 plugin_unlock(0, share->db_plugin);
5741 }
5742
5743 /*
5744 report an error, unless it is "generic" and a more
5745 specific one was already reported
5746 */
5747 if (error != HA_ERR_GENERIC || !thd->is_error())
5748 my_error(ER_GET_ERRNO, MYF(0), error, plugin_name(plugin)->str);
5749 share->db_plugin= 0;
5750 }
5751 else
5752 share->error= OPEN_FRM_OK;
5753
5754 status_var_increment(thd->status_var.ha_discover_count);
5755 return TRUE; // abort the search
5756 }
5757 share->db_plugin= 0;
5758 }
5759
5760 DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);
5761 return FALSE; // continue with the next engine
5762 }
5763
ha_discover_table(THD * thd,TABLE_SHARE * share)5764 int ha_discover_table(THD *thd, TABLE_SHARE *share)
5765 {
5766 DBUG_ENTER("ha_discover_table");
5767 int found;
5768
5769 DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR); // share is not OK yet
5770
5771 if (!engines_with_discover)
5772 found= FALSE;
5773 else if (share->db_plugin)
5774 found= discover_handlerton(thd, share->db_plugin, share);
5775 else
5776 found= plugin_foreach(thd, discover_handlerton,
5777 MYSQL_STORAGE_ENGINE_PLUGIN, share);
5778
5779 if (!found)
5780 open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found
5781
5782 DBUG_RETURN(share->error != OPEN_FRM_OK);
5783 }
5784
file_ext_exists(char * path,size_t path_len,const char * ext)5785 static my_bool file_ext_exists(char *path, size_t path_len, const char *ext)
5786 {
5787 strmake(path + path_len, ext, FN_REFLEN - path_len);
5788 return !access(path, F_OK);
5789 }
5790
5791 struct st_discover_existence_args
5792 {
5793 char *path;
5794 size_t path_len;
5795 const char *db, *table_name;
5796 handlerton *hton;
5797 bool frm_exists;
5798 };
5799
discover_existence(THD * thd,plugin_ref plugin,void * arg)5800 static my_bool discover_existence(THD *thd, plugin_ref plugin,
5801 void *arg)
5802 {
5803 st_discover_existence_args *args= (st_discover_existence_args*)arg;
5804 handlerton *ht= plugin_hton(plugin);
5805 if (!ht->discover_table_existence)
5806 return args->frm_exists;
5807
5808 args->hton= ht;
5809
5810 if (ht->discover_table_existence == ext_based_existence)
5811 return file_ext_exists(args->path, args->path_len,
5812 ht->tablefile_extensions[0]);
5813
5814 return ht->discover_table_existence(ht, args->db, args->table_name);
5815 }
5816
5817
5818 /**
5819 Check if a given table exists, without doing a full discover, if possible
5820
5821 If the 'hton' is not NULL, it's set to the handlerton of the storage engine
5822 of this table, or to view_pseudo_hton if the frm belongs to a view.
5823
5824 This function takes discovery correctly into account. If frm is found,
5825 it discovers the table to make sure it really exists in the engine.
5826 If no frm is found it discovers the table, in case it still exists in
5827 the engine.
5828
5829 While it tries to cut corners (don't open .frm if no discovering engine is
5830 enabled, no full discovery if all discovering engines support
5831 discover_table_existence, etc), it still *may* be quite expensive
5832 and must be used sparingly.
5833
5834 @retval true Table exists (even if the error occurred, like bad frm)
5835 @retval false Table does not exist (one can do CREATE TABLE table_name)
5836
5837 @note if frm exists and the table in engine doesn't, *hton will be set,
5838 but the return value will be false.
5839
5840 @note if frm file exists, but the table cannot be opened (engine not
5841 loaded, frm is invalid), the return value will be true, but
5842 *hton will be NULL.
5843 */
5844
ha_table_exists(THD * thd,const LEX_CSTRING * db,const LEX_CSTRING * table_name,handlerton ** hton,bool * is_sequence)5845 bool ha_table_exists(THD *thd, const LEX_CSTRING *db,
5846 const LEX_CSTRING *table_name,
5847 handlerton **hton, bool *is_sequence)
5848 {
5849 handlerton *dummy;
5850 bool dummy2;
5851 DBUG_ENTER("ha_table_exists");
5852
5853 if (hton)
5854 *hton= 0;
5855 else if (engines_with_discover)
5856 hton= &dummy;
5857 if (!is_sequence)
5858 is_sequence= &dummy2;
5859 *is_sequence= 0;
5860
5861 TDC_element *element= tdc_lock_share(thd, db->str, table_name->str);
5862 if (element && element != MY_ERRPTR)
5863 {
5864 if (hton)
5865 *hton= element->share->db_type();
5866 *is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE;
5867 tdc_unlock_share(element);
5868 DBUG_RETURN(TRUE);
5869 }
5870
5871 char path[FN_REFLEN + 1];
5872 size_t path_len = build_table_filename(path, sizeof(path) - 1,
5873 db->str, table_name->str, "", 0);
5874 st_discover_existence_args args= {path, path_len, db->str, table_name->str, 0, true};
5875
5876 if (file_ext_exists(path, path_len, reg_ext))
5877 {
5878 bool exists= true;
5879 if (hton)
5880 {
5881 char engine_buf[NAME_CHAR_LEN + 1];
5882 LEX_CSTRING engine= { engine_buf, 0 };
5883 Table_type type= dd_frm_type(thd, path, &engine);
5884
5885 switch (type) {
5886 case TABLE_TYPE_UNKNOWN:
5887 DBUG_PRINT("exit", ("Exist, cannot be opened"));
5888 DBUG_RETURN(true); // Frm exists
5889 case TABLE_TYPE_VIEW:
5890 *hton= view_pseudo_hton;
5891 DBUG_PRINT("exit", ("Exist, view"));
5892 DBUG_RETURN(true); // Frm exists
5893 case TABLE_TYPE_SEQUENCE:
5894 *is_sequence= true;
5895 /* fall through */
5896 case TABLE_TYPE_NORMAL:
5897 {
5898 plugin_ref p= plugin_lock_by_name(thd, &engine,
5899 MYSQL_STORAGE_ENGINE_PLUGIN);
5900 *hton= p ? plugin_hton(p) : NULL;
5901 if (*hton) // verify that the table really exists
5902 exists= discover_existence(thd, p, &args);
5903 }
5904 }
5905 }
5906 DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist"));
5907 DBUG_RETURN(exists);
5908 }
5909
5910 args.frm_exists= false;
5911 if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN,
5912 &args))
5913 {
5914 if (hton)
5915 *hton= args.hton;
5916 DBUG_PRINT("exit", ("discovery found file"));
5917 DBUG_RETURN(TRUE);
5918 }
5919
5920 if (need_full_discover_for_existence)
5921 {
5922 TABLE_LIST table;
5923 bool exists;
5924 uint flags = GTS_TABLE | GTS_VIEW;
5925
5926 if (!hton)
5927 flags|= GTS_NOLOCK;
5928
5929 Table_exists_error_handler no_such_table_handler;
5930 thd->push_internal_handler(&no_such_table_handler);
5931 table.init_one_table(db, table_name, 0, TL_READ);
5932 TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags);
5933 thd->pop_internal_handler();
5934
5935 if (hton && share)
5936 {
5937 *hton= share->db_type();
5938 tdc_release_share(share);
5939 }
5940
5941 // the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else
5942 exists= !no_such_table_handler.safely_trapped_errors();
5943 DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist"));
5944 DBUG_RETURN(exists);
5945 }
5946
5947 DBUG_PRINT("exit", ("Does not exist"));
5948 DBUG_RETURN(FALSE);
5949 }
5950
5951
5952 /*
5953 Check if the CREATE/ALTER table should be ignored
5954 This could happen for slaves where the table is shared between master
5955 and slave
5956
5957 If statement is ignored, write a note
5958 */
5959
check_if_updates_are_ignored(const char * op) const5960 bool handler::check_if_updates_are_ignored(const char *op) const
5961 {
5962 return ha_check_if_updates_are_ignored(table->in_use, ht, op);
5963 }
5964
5965
ha_check_if_updates_are_ignored(THD * thd,handlerton * hton,const char * op)5966 bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton,
5967 const char *op)
5968 {
5969 DBUG_ENTER("ha_check_if_updates_are_ignored");
5970 if (!thd->slave_thread || !(hton= ha_checktype(thd, hton, 1)))
5971 DBUG_RETURN(0); // Not slave or no engine
5972 if (!(hton->flags & HTON_IGNORE_UPDATES))
5973 DBUG_RETURN(0); // Not shared table
5974 my_error(ER_SLAVE_IGNORED_SHARED_TABLE, MYF(ME_NOTE), op);
5975 DBUG_RETURN(1);
5976 }
5977
5978
5979 /**
5980 Discover all table names in a given database
5981 */
5982 extern "C" {
5983
cmp_file_names(const void * a,const void * b)5984 static int cmp_file_names(const void *a, const void *b)
5985 {
5986 CHARSET_INFO *cs= character_set_filesystem;
5987 char *aa= ((FILEINFO *)a)->name;
5988 char *bb= ((FILEINFO *)b)->name;
5989 return cs->strnncoll(aa, strlen(aa), bb, strlen(bb));
5990 }
5991
cmp_table_names(LEX_CSTRING * const * a,LEX_CSTRING * const * b)5992 static int cmp_table_names(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5993 {
5994 return my_charset_bin.strnncoll((*a)->str, (*a)->length,
5995 (*b)->str, (*b)->length);
5996 }
5997
5998 #ifndef DBUG_OFF
cmp_table_names_desc(LEX_CSTRING * const * a,LEX_CSTRING * const * b)5999 static int cmp_table_names_desc(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
6000 {
6001 return -cmp_table_names(a, b);
6002 }
6003 #endif
6004
6005 }
6006
Discovered_table_list(THD * thd_arg,Dynamic_array<LEX_CSTRING * > * tables_arg,const LEX_CSTRING * wild_arg)6007 Discovered_table_list::Discovered_table_list(THD *thd_arg,
6008 Dynamic_array<LEX_CSTRING*> *tables_arg,
6009 const LEX_CSTRING *wild_arg) :
6010 thd(thd_arg), with_temps(false), tables(tables_arg)
6011 {
6012 if (wild_arg->str && wild_arg->str[0])
6013 {
6014 wild= wild_arg->str;
6015 wend= wild + wild_arg->length;
6016 }
6017 else
6018 wild= 0;
6019 }
6020
add_table(const char * tname,size_t tlen)6021 bool Discovered_table_list::add_table(const char *tname, size_t tlen)
6022 {
6023 /*
6024 TODO Check with_temps and filter out temp tables.
6025 Implement the check, when we'll have at least one affected engine (with
6026 custom discover_table_names() method, that calls add_table() directly).
6027 Note: avoid comparing the same name twice (here and in add_file).
6028 */
6029 if (wild && table_alias_charset->wildcmp(tname, tname + tlen, wild, wend,
6030 wild_prefix, wild_one, wild_many))
6031 return 0;
6032
6033 LEX_CSTRING *name= thd->make_clex_string(tname, tlen);
6034 if (!name || tables->append(name))
6035 return 1;
6036 return 0;
6037 }
6038
add_file(const char * fname)6039 bool Discovered_table_list::add_file(const char *fname)
6040 {
6041 bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == 0;
6042
6043 if (is_temp && !with_temps)
6044 return 0;
6045
6046 char tname[SAFE_NAME_LEN + 1];
6047 size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp);
6048 return add_table(tname, tlen);
6049 }
6050
6051
sort()6052 void Discovered_table_list::sort()
6053 {
6054 tables->sort(cmp_table_names);
6055 }
6056
6057
6058 #ifndef DBUG_OFF
sort_desc()6059 void Discovered_table_list::sort_desc()
6060 {
6061 tables->sort(cmp_table_names_desc);
6062 }
6063 #endif
6064
6065
remove_duplicates()6066 void Discovered_table_list::remove_duplicates()
6067 {
6068 LEX_CSTRING **src= tables->front();
6069 LEX_CSTRING **dst= src;
6070 sort();
6071 while (++dst <= tables->back())
6072 {
6073 LEX_CSTRING *s= *src, *d= *dst;
6074 DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= 0);
6075 if ((s->length != d->length || strncmp(s->str, d->str, d->length)))
6076 {
6077 src++;
6078 if (src != dst)
6079 *src= *dst;
6080 }
6081 }
6082 tables->elements(src - tables->front() + 1);
6083 }
6084
6085 struct st_discover_names_args
6086 {
6087 LEX_CSTRING *db;
6088 MY_DIR *dirp;
6089 Discovered_table_list *result;
6090 uint possible_duplicates;
6091 };
6092
discover_names(THD * thd,plugin_ref plugin,void * arg)6093 static my_bool discover_names(THD *thd, plugin_ref plugin,
6094 void *arg)
6095 {
6096 st_discover_names_args *args= (st_discover_names_args *)arg;
6097 handlerton *ht= plugin_hton(plugin);
6098
6099 if (ht->discover_table_names)
6100 {
6101 size_t old_elements= args->result->tables->elements();
6102 if (ht->discover_table_names(ht, args->db, args->dirp, args->result))
6103 return 1;
6104
6105 /*
6106 hton_ext_based_table_discovery never discovers a table that has
6107 a corresponding .frm file; but custom engine discover methods might
6108 */
6109 if (ht->discover_table_names != hton_ext_based_table_discovery)
6110 args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements);
6111 }
6112
6113 return 0;
6114 }
6115
6116 /**
6117 Return the list of tables
6118
6119 @param thd
6120 @param db database to look into
6121 @param dirp list of files in this database (as returned by my_dir())
6122 @param result the object to return the list of files in
6123 @param reusable if true, on return, 'dirp' will be a valid list of all
6124 non-table files. If false, discovery will work much faster,
6125 but it will leave 'dirp' corrupted and completely unusable,
6126 only good for my_dirend().
6127
6128 Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true
6129 for DROP DATABASE (as it needs to know and delete non-table files).
6130 */
6131
ha_discover_table_names(THD * thd,LEX_CSTRING * db,MY_DIR * dirp,Discovered_table_list * result,bool reusable)6132 int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp,
6133 Discovered_table_list *result, bool reusable)
6134 {
6135 int error;
6136 DBUG_ENTER("ha_discover_table_names");
6137
6138 if (engines_with_discover_file_names == 0 && !reusable)
6139 {
6140 st_discover_names_args args= {db, NULL, result, 0};
6141 error= ext_table_discovery_simple(dirp, result) ||
6142 plugin_foreach(thd, discover_names,
6143 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
6144 if (args.possible_duplicates > 0)
6145 result->remove_duplicates();
6146 }
6147 else
6148 {
6149 st_discover_names_args args= {db, dirp, result, 0};
6150
6151 /* extension_based_table_discovery relies on dirp being sorted */
6152 my_qsort(dirp->dir_entry, dirp->number_of_files,
6153 sizeof(FILEINFO), cmp_file_names);
6154
6155 error= extension_based_table_discovery(dirp, reg_ext, result) ||
6156 plugin_foreach(thd, discover_names,
6157 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
6158 if (args.possible_duplicates > 0)
6159 result->remove_duplicates();
6160 }
6161
6162 DBUG_RETURN(error);
6163 }
6164
6165
6166 /*
6167 int handler::pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
6168 KEY_MULTI_RANGE *ranges,
6169 uint range_count,
6170 bool sorted, HANDLER_BUFFER *buffer,
6171 bool use_parallel)
6172 {
6173 int result;
6174 DBUG_ENTER("handler::pre_read_multi_range_first");
6175 result = pre_read_range_first(ranges->start_key.keypart_map ?
6176 &ranges->start_key : 0,
6177 ranges->end_key.keypart_map ?
6178 &ranges->end_key : 0,
6179 test(ranges->range_flag & EQ_RANGE),
6180 sorted,
6181 use_parallel);
6182 DBUG_RETURN(result);
6183 }
6184 */
6185
6186
6187 /**
6188 Read first row between two ranges.
6189 Store ranges for future calls to read_range_next.
6190
6191 @param start_key Start key. Is 0 if no min range
6192 @param end_key End key. Is 0 if no max range
6193 @param eq_range_arg Set to 1 if start_key == end_key
6194 @param sorted Set to 1 if result should be sorted per key
6195
6196 @note
6197 Record is read into table->record[0]
6198
6199 @retval
6200 0 Found row
6201 @retval
6202 HA_ERR_END_OF_FILE No rows in range
6203 @retval
6204 \# Error code
6205 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)6206 int handler::read_range_first(const key_range *start_key,
6207 const key_range *end_key,
6208 bool eq_range_arg, bool sorted)
6209 {
6210 int result;
6211 DBUG_ENTER("handler::read_range_first");
6212
6213 eq_range= eq_range_arg;
6214 set_end_range(end_key);
6215 range_key_part= table->key_info[active_index].key_part;
6216
6217 if (!start_key) // Read first record
6218 result= ha_index_first(table->record[0]);
6219 else
6220 result= ha_index_read_map(table->record[0],
6221 start_key->key,
6222 start_key->keypart_map,
6223 start_key->flag);
6224 if (result)
6225 DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
6226 ? HA_ERR_END_OF_FILE
6227 : result);
6228
6229 if (compare_key(end_range) <= 0)
6230 {
6231 DBUG_RETURN(0);
6232 }
6233 else
6234 {
6235 /*
6236 The last read row does not fall in the range. So request
6237 storage engine to release row lock if possible.
6238 */
6239 unlock_row();
6240 DBUG_RETURN(HA_ERR_END_OF_FILE);
6241 }
6242 }
6243
6244
6245 /**
6246 Read next row between two ranges.
6247
6248 @note
6249 Record is read into table->record[0]
6250
6251 @retval
6252 0 Found row
6253 @retval
6254 HA_ERR_END_OF_FILE No rows in range
6255 @retval
6256 \# Error code
6257 */
read_range_next()6258 int handler::read_range_next()
6259 {
6260 int result;
6261 DBUG_ENTER("handler::read_range_next");
6262
6263 if (eq_range)
6264 {
6265 /* We trust that index_next_same always gives a row in range */
6266 DBUG_RETURN(ha_index_next_same(table->record[0],
6267 end_range->key,
6268 end_range->length));
6269 }
6270 result= ha_index_next(table->record[0]);
6271 if (result)
6272 DBUG_RETURN(result);
6273
6274 if (compare_key(end_range) <= 0)
6275 {
6276 DBUG_RETURN(0);
6277 }
6278 else
6279 {
6280 /*
6281 The last read row does not fall in the range. So request
6282 storage engine to release row lock if possible.
6283 */
6284 unlock_row();
6285 DBUG_RETURN(HA_ERR_END_OF_FILE);
6286 }
6287 }
6288
6289
set_end_range(const key_range * end_key)6290 void handler::set_end_range(const key_range *end_key)
6291 {
6292 end_range= 0;
6293 if (end_key)
6294 {
6295 end_range= &save_end_range;
6296 save_end_range= *end_key;
6297 key_compare_result_on_equal=
6298 ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
6299 (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
6300 }
6301 }
6302
6303
6304 /**
6305 Compare if found key (in row) is over max-value.
6306
6307 @param range range to compare to row. May be 0 for no range
6308
6309 @see also
6310 key.cc::key_cmp()
6311
6312 @return
6313 The return value is SIGN(key_in_row - range_key):
6314
6315 - 0 : Key is equal to range or 'range' == 0 (no range)
6316 - -1 : Key is less than range
6317 - 1 : Key is larger than range
6318 */
compare_key(key_range * range)6319 int handler::compare_key(key_range *range)
6320 {
6321 int cmp;
6322 if (!range || in_range_check_pushed_down)
6323 return 0; // No max range
6324 cmp= key_cmp(range_key_part, range->key, range->length);
6325 if (!cmp)
6326 cmp= key_compare_result_on_equal;
6327 return cmp;
6328 }
6329
6330
6331 /*
6332 Same as compare_key() but doesn't check have in_range_check_pushed_down.
6333 This is used by index condition pushdown implementation.
6334 */
6335
compare_key2(key_range * range) const6336 int handler::compare_key2(key_range *range) const
6337 {
6338 int cmp;
6339 if (!range)
6340 return 0; // no max range
6341 cmp= key_cmp(range_key_part, range->key, range->length);
6342 if (!cmp)
6343 cmp= key_compare_result_on_equal;
6344 return cmp;
6345 }
6346
6347
6348 /**
6349 ICP callback - to be called by an engine to check the pushed condition
6350 */
handler_index_cond_check(void * h_arg)6351 extern "C" check_result_t handler_index_cond_check(void* h_arg)
6352 {
6353 handler *h= (handler*)h_arg;
6354 THD *thd= h->table->in_use;
6355 check_result_t res;
6356
6357 DEBUG_SYNC(thd, "handler_index_cond_check");
6358 enum thd_kill_levels abort_at= h->has_rollback() ?
6359 THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6360 if (thd_kill_level(thd) > abort_at)
6361 return CHECK_ABORTED_BY_USER;
6362
6363 if (h->end_range && h->compare_key2(h->end_range) > 0)
6364 return CHECK_OUT_OF_RANGE;
6365 h->increment_statistics(&SSV::ha_icp_attempts);
6366 if ((res= h->pushed_idx_cond->val_int()? CHECK_POS : CHECK_NEG) ==
6367 CHECK_POS)
6368 h->increment_statistics(&SSV::ha_icp_match);
6369 return res;
6370 }
6371
6372
6373 /**
6374 Rowid filter callback - to be called by an engine to check rowid / primary
6375 keys of the rows whose data is to be fetched against the used rowid filter
6376 */
6377
6378 extern "C"
handler_rowid_filter_check(void * h_arg)6379 check_result_t handler_rowid_filter_check(void *h_arg)
6380 {
6381 handler *h= (handler*) h_arg;
6382 TABLE *tab= h->get_table();
6383
6384 /*
6385 Check for out-of-range and killed conditions only if we haven't done it
6386 already in the pushed index condition check
6387 */
6388 if (!h->pushed_idx_cond)
6389 {
6390 THD *thd= h->table->in_use;
6391 DEBUG_SYNC(thd, "handler_rowid_filter_check");
6392 enum thd_kill_levels abort_at= h->has_transactions() ?
6393 THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6394 if (thd_kill_level(thd) > abort_at)
6395 return CHECK_ABORTED_BY_USER;
6396
6397 if (h->end_range && h->compare_key2(h->end_range) > 0)
6398 return CHECK_OUT_OF_RANGE;
6399 }
6400
6401 h->position(tab->record[0]);
6402 return h->pushed_rowid_filter->check((char*)h->ref)? CHECK_POS: CHECK_NEG;
6403 }
6404
6405
6406 /**
6407 Callback function for an engine to check whether the used rowid filter
6408 has been already built
6409 */
6410
handler_rowid_filter_is_active(void * h_arg)6411 extern "C" int handler_rowid_filter_is_active(void *h_arg)
6412 {
6413 if (!h_arg)
6414 return false;
6415 handler *h= (handler*) h_arg;
6416 return h->rowid_filter_is_active;
6417 }
6418
6419
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)6420 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
6421 key_part_map keypart_map,
6422 enum ha_rkey_function find_flag)
6423 {
6424 int error, UNINIT_VAR(error1);
6425
6426 error= ha_index_init(index, 0);
6427 if (likely(!error))
6428 {
6429 error= index_read_map(buf, key, keypart_map, find_flag);
6430 error1= ha_index_end();
6431 }
6432 return error ? error : error1;
6433 }
6434
6435
6436 /**
6437 Returns a list of all known extensions.
6438
6439 No mutexes, worst case race is a minor surplus memory allocation
6440 We have to recreate the extension map if mysqld is restarted (for example
6441 within libmysqld)
6442
6443 @retval
6444 pointer pointer to TYPELIB structure
6445 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)6446 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
6447 void *arg)
6448 {
6449 List<char> *found_exts= (List<char> *) arg;
6450 handlerton *hton= plugin_hton(plugin);
6451 List_iterator_fast<char> it(*found_exts);
6452 const char **ext, *old_ext;
6453
6454 for (ext= hton->tablefile_extensions; *ext; ext++)
6455 {
6456 while ((old_ext= it++))
6457 {
6458 if (!strcmp(old_ext, *ext))
6459 break;
6460 }
6461 if (!old_ext)
6462 found_exts->push_back((char *) *ext);
6463
6464 it.rewind();
6465 }
6466 return FALSE;
6467 }
6468
ha_known_exts(void)6469 TYPELIB *ha_known_exts(void)
6470 {
6471 if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
6472 {
6473 List<char> found_exts;
6474 const char **ext, *old_ext;
6475
6476 known_extensions_id= mysys_usage_id;
6477 found_exts.push_back((char*) TRG_EXT);
6478 found_exts.push_back((char*) TRN_EXT);
6479
6480 plugin_foreach(NULL, exts_handlerton,
6481 MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
6482
6483 ext= (const char **) my_once_alloc(sizeof(char *)*
6484 (found_exts.elements+1),
6485 MYF(MY_WME | MY_FAE));
6486
6487 DBUG_ASSERT(ext != 0);
6488 known_extensions.count= found_exts.elements;
6489 known_extensions.type_names= ext;
6490
6491 List_iterator_fast<char> it(found_exts);
6492 while ((old_ext= it++))
6493 *ext++= old_ext;
6494 *ext= 0;
6495 }
6496 return &known_extensions;
6497 }
6498
6499
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)6500 static bool stat_print(THD *thd, const char *type, size_t type_len,
6501 const char *file, size_t file_len,
6502 const char *status, size_t status_len)
6503 {
6504 Protocol *protocol= thd->protocol;
6505 protocol->prepare_for_resend();
6506 protocol->store(type, type_len, system_charset_info);
6507 protocol->store(file, file_len, system_charset_info);
6508 protocol->store(status, status_len, system_charset_info);
6509 if (protocol->write())
6510 return TRUE;
6511 return FALSE;
6512 }
6513
6514
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)6515 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
6516 void *arg)
6517 {
6518 enum ha_stat_type stat= *(enum ha_stat_type *) arg;
6519 handlerton *hton= plugin_hton(plugin);
6520 if (hton->show_status &&
6521 hton->show_status(hton, thd, stat_print, stat))
6522 return TRUE;
6523 return FALSE;
6524 }
6525
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)6526 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
6527 {
6528 List<Item> field_list;
6529 Protocol *protocol= thd->protocol;
6530 MEM_ROOT *mem_root= thd->mem_root;
6531 bool result;
6532
6533 field_list.push_back(new (mem_root) Item_empty_string(thd, "Type", 10),
6534 mem_root);
6535 field_list.push_back(new (mem_root)
6536 Item_empty_string(thd, "Name", FN_REFLEN), mem_root);
6537 field_list.push_back(new (mem_root)
6538 Item_empty_string(thd, "Status", 10),
6539 mem_root);
6540
6541 if (protocol->send_result_set_metadata(&field_list,
6542 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
6543 return TRUE;
6544
6545 if (db_type == NULL)
6546 {
6547 result= plugin_foreach(thd, showstat_handlerton,
6548 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
6549 }
6550 else
6551 {
6552 result= db_type->show_status &&
6553 db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
6554 }
6555
6556 /*
6557 We also check thd->is_error() as Innodb may return 0 even if
6558 there was an error.
6559 */
6560 if (likely(!result && !thd->is_error()))
6561 my_eof(thd);
6562 else if (!thd->is_error())
6563 my_error(ER_GET_ERRNO, MYF(0), errno, hton_name(db_type)->str);
6564 return result;
6565 }
6566
6567 /*
6568 Function to check if the conditions for row-based binlogging is
6569 correct for the table.
6570
6571 A row in the given table should be replicated if:
6572 - It's not called by partition engine
6573 - Row-based replication is enabled in the current thread
6574 - The binlog is enabled
6575 - It is not a temporary table
6576 - The binary log is open
6577 - The database the table resides in shall be binlogged (binlog_*_db rules)
6578 - table is not mysql.event
6579
6580 RETURN VALUE
6581 0 No binary logging in row format
6582 1 Row needs to be logged
6583 */
6584
check_table_binlog_row_based()6585 bool handler::check_table_binlog_row_based()
6586 {
6587 if (unlikely((!check_table_binlog_row_based_done)))
6588 {
6589 check_table_binlog_row_based_done= 1;
6590 check_table_binlog_row_based_result=
6591 check_table_binlog_row_based_internal();
6592 }
6593 return check_table_binlog_row_based_result;
6594 }
6595
check_table_binlog_row_based_internal()6596 bool handler::check_table_binlog_row_based_internal()
6597 {
6598 THD *thd= table->in_use;
6599
6600 #ifdef WITH_WSREP
6601 if (!thd->variables.sql_log_bin &&
6602 wsrep_thd_is_applying(table->in_use))
6603 {
6604 /*
6605 wsrep patch sets sql_log_bin to silence binlogging from high
6606 priority threads
6607 */
6608 return 0;
6609 }
6610 #endif
6611 return (table->s->can_do_row_logging &&
6612 !table->versioned(VERS_TRX_ID) &&
6613 !(thd->variables.option_bits & OPTION_BIN_TMP_LOG_OFF) &&
6614 thd->is_current_stmt_binlog_format_row() &&
6615 /*
6616 Wsrep partially enables binary logging if it have not been
6617 explicitly turned on. As a result we return 'true' if we are in
6618 wsrep binlog emulation mode and the current thread is not a wsrep
6619 applier or replayer thread. This decision is not affected by
6620 @@sql_log_bin as we want the events to make into the binlog
6621 cache only to filter them later before they make into binary log
6622 file.
6623
6624 However, we do return 'false' if binary logging was temporarily
6625 turned off (see tmp_disable_binlog(A)).
6626
6627 Otherwise, return 'true' if binary logging is on.
6628 */
6629 IF_WSREP(((WSREP_EMULATE_BINLOG_NNULL(thd) &&
6630 wsrep_thd_is_local(thd)) ||
6631 ((WSREP_NNULL(thd) ||
6632 (thd->variables.option_bits & OPTION_BIN_LOG)) &&
6633 mysql_bin_log.is_open())),
6634 (thd->variables.option_bits & OPTION_BIN_LOG) &&
6635 mysql_bin_log.is_open()));
6636 }
6637
6638
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)6639 int handler::binlog_log_row(TABLE *table,
6640 const uchar *before_record,
6641 const uchar *after_record,
6642 Log_func *log_func)
6643 {
6644 bool error;
6645 THD *thd= table->in_use;
6646 DBUG_ENTER("binlog_log_row");
6647
6648 if (!thd->binlog_table_maps &&
6649 thd->binlog_write_table_maps())
6650 DBUG_RETURN(HA_ERR_RBR_LOGGING_FAILED);
6651
6652 error= (*log_func)(thd, table, row_logging_has_trans,
6653 before_record, after_record);
6654 DBUG_RETURN(error ? HA_ERR_RBR_LOGGING_FAILED : 0);
6655 }
6656
6657
ha_external_lock(THD * thd,int lock_type)6658 int handler::ha_external_lock(THD *thd, int lock_type)
6659 {
6660 int error;
6661 DBUG_ENTER("handler::ha_external_lock");
6662 /*
6663 Whether this is lock or unlock, this should be true, and is to verify that
6664 if get_auto_increment() was called (thus may have reserved intervals or
6665 taken a table lock), ha_release_auto_increment() was too.
6666 */
6667 DBUG_ASSERT(next_insert_id == 0);
6668 /* Consecutive calls for lock without unlocking in between is not allowed */
6669 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6670 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
6671 lock_type == F_UNLCK));
6672 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
6673 DBUG_ASSERT(inited == NONE || table->open_by_handler);
6674
6675 if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
6676 MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
6677 MYSQL_HANDLER_UNLOCK_START_ENABLED())
6678 {
6679 if (lock_type == F_RDLCK)
6680 {
6681 MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
6682 table_share->table_name.str);
6683 }
6684 else if (lock_type == F_WRLCK)
6685 {
6686 MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
6687 table_share->table_name.str);
6688 }
6689 else if (lock_type == F_UNLCK)
6690 {
6691 MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
6692 table_share->table_name.str);
6693 }
6694 }
6695
6696 /*
6697 We cache the table flags if the locking succeeded. Otherwise, we
6698 keep them as they were when they were fetched in ha_open().
6699 */
6700 MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
6701 { error= external_lock(thd, lock_type); })
6702
6703 DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;);
6704
6705 if (likely(error == 0 || lock_type == F_UNLCK))
6706 {
6707 m_lock_type= lock_type;
6708 cached_table_flags= table_flags();
6709 if (table_share->tmp_table == NO_TMP_TABLE)
6710 mysql_audit_external_lock(thd, table_share, lock_type);
6711 }
6712
6713 if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
6714 MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
6715 MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
6716 {
6717 if (lock_type == F_RDLCK)
6718 {
6719 MYSQL_HANDLER_RDLOCK_DONE(error);
6720 }
6721 else if (lock_type == F_WRLCK)
6722 {
6723 MYSQL_HANDLER_WRLOCK_DONE(error);
6724 }
6725 else if (lock_type == F_UNLCK)
6726 {
6727 MYSQL_HANDLER_UNLOCK_DONE(error);
6728 }
6729 }
6730 DBUG_RETURN(error);
6731 }
6732
6733
6734 /** @brief
6735 Check handler usage and reset state of file to after 'open'
6736 */
ha_reset()6737 int handler::ha_reset()
6738 {
6739 DBUG_ENTER("ha_reset");
6740
6741 /* Check that we have called all proper deallocation functions */
6742 DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
6743 table->s->column_bitmap_size ==
6744 (uchar*) table->def_write_set.bitmap);
6745 DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
6746 DBUG_ASSERT(!table->file->keyread_enabled());
6747 /* ensure that ha_index_end / ha_rnd_end has been called */
6748 DBUG_ASSERT(inited == NONE);
6749 /* reset the bitmaps to point to defaults */
6750 table->default_column_bitmaps();
6751 pushed_cond= NULL;
6752 tracker= NULL;
6753 mark_trx_read_write_done= 0;
6754 /*
6755 Disable row logging.
6756 */
6757 row_logging= row_logging_init= 0;
6758 clear_cached_table_binlog_row_based_flag();
6759 /* Reset information about pushed engine conditions */
6760 cancel_pushed_idx_cond();
6761 /* Reset information about pushed index conditions */
6762 cancel_pushed_rowid_filter();
6763 if (lookup_handler != this)
6764 {
6765 lookup_handler->ha_external_unlock(table->in_use);
6766 lookup_handler->close();
6767 delete lookup_handler;
6768 lookup_handler= this;
6769 }
6770 DBUG_RETURN(reset());
6771 }
6772
6773 #ifdef WITH_WSREP
wsrep_after_row(THD * thd)6774 static int wsrep_after_row(THD *thd)
6775 {
6776 DBUG_ENTER("wsrep_after_row");
6777 if (thd->internal_transaction())
6778 DBUG_RETURN(0);
6779
6780 /* enforce wsrep_max_ws_rows */
6781 thd->wsrep_affected_rows++;
6782 if (wsrep_max_ws_rows &&
6783 thd->wsrep_affected_rows > wsrep_max_ws_rows &&
6784 wsrep_thd_is_local(thd))
6785 {
6786 trans_rollback_stmt(thd) || trans_rollback(thd);
6787 my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
6788 DBUG_RETURN(ER_ERROR_DURING_COMMIT);
6789 }
6790 else if (wsrep_after_row_internal(thd))
6791 {
6792 DBUG_RETURN(ER_LOCK_DEADLOCK);
6793 }
6794 DBUG_RETURN(0);
6795 }
6796 #endif /* WITH_WSREP */
6797
6798
6799 /**
6800 Check if there is a conflicting unique hash key
6801 */
6802
check_duplicate_long_entry_key(const uchar * new_rec,uint key_no)6803 int handler::check_duplicate_long_entry_key(const uchar *new_rec, uint key_no)
6804 {
6805 int result, error= 0;
6806 KEY *key_info= table->key_info + key_no;
6807 Field *hash_field= key_info->key_part->field;
6808 uchar ptr[HA_HASH_KEY_LENGTH_WITH_NULL];
6809 DBUG_ENTER("handler::check_duplicate_long_entry_key");
6810
6811 DBUG_ASSERT((key_info->flags & HA_NULL_PART_KEY &&
6812 key_info->key_length == HA_HASH_KEY_LENGTH_WITH_NULL) ||
6813 key_info->key_length == HA_HASH_KEY_LENGTH_WITHOUT_NULL);
6814
6815 if (hash_field->is_real_null())
6816 DBUG_RETURN(0);
6817
6818 key_copy(ptr, new_rec, key_info, key_info->key_length, false);
6819
6820 result= lookup_handler->ha_index_init(key_no, 0);
6821 if (result)
6822 DBUG_RETURN(result);
6823 store_record(table, file->lookup_buffer);
6824 result= lookup_handler->ha_index_read_map(table->record[0],
6825 ptr, HA_WHOLE_KEY, HA_READ_KEY_EXACT);
6826 if (!result)
6827 {
6828 bool is_same;
6829 Field * t_field;
6830 Item_func_hash * temp= (Item_func_hash *)hash_field->vcol_info->expr;
6831 Item ** arguments= temp->arguments();
6832 uint arg_count= temp->argument_count();
6833 do
6834 {
6835 my_ptrdiff_t diff= table->file->lookup_buffer - new_rec;
6836 is_same= true;
6837 for (uint j=0; is_same && j < arg_count; j++)
6838 {
6839 DBUG_ASSERT(arguments[j]->type() == Item::FIELD_ITEM ||
6840 // this one for left(fld_name,length)
6841 arguments[j]->type() == Item::FUNC_ITEM);
6842 if (arguments[j]->type() == Item::FIELD_ITEM)
6843 {
6844 t_field= static_cast<Item_field *>(arguments[j])->field;
6845 if (t_field->cmp_offset(diff))
6846 is_same= false;
6847 }
6848 else
6849 {
6850 Item_func_left *fnc= static_cast<Item_func_left *>(arguments[j]);
6851 DBUG_ASSERT(!my_strcasecmp(system_charset_info, "left", fnc->func_name()));
6852 DBUG_ASSERT(fnc->arguments()[0]->type() == Item::FIELD_ITEM);
6853 t_field= static_cast<Item_field *>(fnc->arguments()[0])->field;
6854 uint length= (uint)fnc->arguments()[1]->val_int();
6855 if (t_field->cmp_prefix(t_field->ptr, t_field->ptr + diff, length))
6856 is_same= false;
6857 }
6858 }
6859 }
6860 while (!is_same &&
6861 !(result= lookup_handler->ha_index_next_same(table->record[0],
6862 ptr, key_info->key_length)));
6863 if (is_same)
6864 error= HA_ERR_FOUND_DUPP_KEY;
6865 goto exit;
6866 }
6867 if (result != HA_ERR_KEY_NOT_FOUND)
6868 error= result;
6869 exit:
6870 if (error == HA_ERR_FOUND_DUPP_KEY)
6871 {
6872 table->file->lookup_errkey= key_no;
6873 if (ha_table_flags() & HA_DUPLICATE_POS)
6874 {
6875 lookup_handler->position(table->record[0]);
6876 memcpy(table->file->dup_ref, lookup_handler->ref, ref_length);
6877 }
6878 }
6879 restore_record(table, file->lookup_buffer);
6880 lookup_handler->ha_index_end();
6881 DBUG_RETURN(error);
6882 }
6883
alloc_lookup_buffer()6884 void handler::alloc_lookup_buffer()
6885 {
6886 if (!lookup_buffer)
6887 lookup_buffer= (uchar*)alloc_root(&table->mem_root,
6888 table_share->max_unique_length
6889 + table_share->null_fields
6890 + table_share->reclength);
6891 }
6892
6893 /** @brief
6894 check whether inserted records breaks the
6895 unique constraint on long columns.
6896 @returns 0 if no duplicate else returns error
6897 */
check_duplicate_long_entries(const uchar * new_rec)6898 int handler::check_duplicate_long_entries(const uchar *new_rec)
6899 {
6900 lookup_errkey= (uint)-1;
6901 for (uint i= 0; i < table->s->keys; i++)
6902 {
6903 int result;
6904 if (table->key_info[i].algorithm == HA_KEY_ALG_LONG_HASH &&
6905 (result= check_duplicate_long_entry_key(new_rec, i)))
6906 return result;
6907 }
6908 return 0;
6909 }
6910
6911
6912 /** @brief
6913 check whether updated records breaks the
6914 unique constraint on long columns.
6915 In the case of update we just need to check the specic key
6916 reason for that is consider case
6917 create table t1(a blob , b blob , x blob , y blob ,unique(a,b)
6918 ,unique(x,y))
6919 and update statement like this
6920 update t1 set a=23+a; in this case if we try to scan for
6921 whole keys in table then index scan on x_y will return 0
6922 because data is same so in the case of update we take
6923 key as a parameter in normal insert key should be -1
6924 @returns 0 if no duplicate else returns error
6925 */
check_duplicate_long_entries_update(const uchar * new_rec)6926 int handler::check_duplicate_long_entries_update(const uchar *new_rec)
6927 {
6928 Field *field;
6929 uint key_parts;
6930 KEY *keyinfo;
6931 KEY_PART_INFO *keypart;
6932 /*
6933 Here we are comparing whether new record and old record are same
6934 with respect to fields in hash_str
6935 */
6936 uint reclength= (uint) (table->record[1] - table->record[0]);
6937
6938 for (uint i= 0; i < table->s->keys; i++)
6939 {
6940 keyinfo= table->key_info + i;
6941 if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH)
6942 {
6943 key_parts= fields_in_hash_keyinfo(keyinfo);
6944 keypart= keyinfo->key_part - key_parts;
6945 for (uint j= 0; j < key_parts; j++, keypart++)
6946 {
6947 int error;
6948 field= keypart->field;
6949 /* Compare fields if they are different then check for duplicates */
6950 if (field->cmp_binary_offset(reclength))
6951 {
6952 if((error= check_duplicate_long_entry_key(new_rec, i)))
6953 return error;
6954 /*
6955 break because check_duplicate_long_entries_key will
6956 take care of remaining fields
6957 */
6958 break;
6959 }
6960 }
6961 }
6962 }
6963 return 0;
6964 }
6965
6966
ha_check_overlaps(const uchar * old_data,const uchar * new_data)6967 int handler::ha_check_overlaps(const uchar *old_data, const uchar* new_data)
6968 {
6969 DBUG_ASSERT(new_data);
6970 if (this != table->file)
6971 return 0;
6972 if (!table_share->period.unique_keys)
6973 return 0;
6974 if (table->versioned() && !table->vers_end_field()->is_max())
6975 return 0;
6976
6977 const bool is_update= old_data != NULL;
6978 uchar *record_buffer= lookup_buffer + table_share->max_unique_length
6979 + table_share->null_fields;
6980
6981 // Needed to compare record refs later
6982 if (is_update)
6983 position(old_data);
6984
6985 DBUG_ASSERT(!keyread_enabled());
6986
6987 int error= 0;
6988 lookup_errkey= (uint)-1;
6989
6990 for (uint key_nr= 0; key_nr < table_share->keys && !error; key_nr++)
6991 {
6992 const KEY &key_info= table->key_info[key_nr];
6993 const uint key_parts= key_info.user_defined_key_parts;
6994 if (!key_info.without_overlaps)
6995 continue;
6996
6997 if (is_update)
6998 {
6999 bool key_used= false;
7000 for (uint k= 0; k < key_parts && !key_used; k++)
7001 key_used= bitmap_is_set(table->write_set,
7002 key_info.key_part[k].fieldnr - 1);
7003 if (!key_used)
7004 continue;
7005 }
7006
7007 error= lookup_handler->ha_index_init(key_nr, 0);
7008 if (error)
7009 return error;
7010
7011 error= lookup_handler->ha_start_keyread(key_nr);
7012 DBUG_ASSERT(!error);
7013
7014 const uint period_field_length= key_info.key_part[key_parts - 1].length;
7015 const uint key_base_length= key_info.key_length - 2 * period_field_length;
7016
7017 key_copy(lookup_buffer, new_data, &key_info, 0);
7018
7019 /* Copy period_start to period_end.
7020 the value in period_start field is not significant, but anyway let's leave
7021 it defined to avoid uninitialized memory access
7022 */
7023 memcpy(lookup_buffer + key_base_length,
7024 lookup_buffer + key_base_length + period_field_length,
7025 period_field_length);
7026
7027 /* Find row with period_end > (period_start of new_data) */
7028 error = lookup_handler->ha_index_read_map(record_buffer, lookup_buffer,
7029 key_part_map((1 << (key_parts - 1)) - 1),
7030 HA_READ_AFTER_KEY);
7031
7032 if (!error && is_update)
7033 {
7034 /* In case of update it could happen that the nearest neighbour is
7035 a record we are updating. It means, that there are no overlaps
7036 from this side.
7037 */
7038 DBUG_ASSERT(lookup_handler != this);
7039 DBUG_ASSERT(ref_length == lookup_handler->ref_length);
7040
7041 lookup_handler->position(record_buffer);
7042 if (memcmp(ref, lookup_handler->ref, ref_length) == 0)
7043 error= lookup_handler->ha_index_next(record_buffer);
7044 }
7045
7046 if (!error && table->check_period_overlaps(key_info, new_data, record_buffer))
7047 error= HA_ERR_FOUND_DUPP_KEY;
7048
7049 if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
7050 error= 0;
7051
7052 if (error == HA_ERR_FOUND_DUPP_KEY)
7053 lookup_errkey= key_nr;
7054
7055 int end_error= lookup_handler->ha_end_keyread();
7056 DBUG_ASSERT(!end_error);
7057
7058 end_error= lookup_handler->ha_index_end();
7059 if (!error && end_error)
7060 error= end_error;
7061 }
7062
7063 return error;
7064 }
7065
7066
7067 /**
7068 Check if galera disables binary logging for this table
7069
7070 @return 0 Binary logging disabled
7071 @return 1 Binary logging can be enabled
7072 */
7073
7074
wsrep_check_if_binlog_row(TABLE * table)7075 static inline bool wsrep_check_if_binlog_row(TABLE *table)
7076 {
7077 #ifdef WITH_WSREP
7078 THD *const thd= table->in_use;
7079
7080 /* only InnoDB tables will be replicated through binlog emulation */
7081 if ((WSREP_EMULATE_BINLOG(thd) &&
7082 !(table->file->partition_ht()->flags & HTON_WSREP_REPLICATION)) ||
7083 thd->wsrep_ignore_table == true)
7084 return 0;
7085 #endif
7086 return 1;
7087 }
7088
7089
7090 /**
7091 Prepare handler for row logging
7092
7093 @return 0 if handler will not participate in row logging
7094 @return 1 handler will participate in row logging
7095
7096 This function is always safe to call on an opened table.
7097 */
7098
prepare_for_row_logging()7099 bool handler::prepare_for_row_logging()
7100 {
7101 DBUG_ENTER("handler::prepare_for_row_logging");
7102
7103 /* Check if we should have row logging */
7104 if (wsrep_check_if_binlog_row(table) &&
7105 check_table_binlog_row_based())
7106 {
7107 /*
7108 Row logging enabled. Intialize all variables and write
7109 annotated and table maps
7110 */
7111 row_logging= row_logging_init= 1;
7112
7113 /*
7114 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7115 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7116 compatible behavior with the STMT based replication even when
7117 the table is not transactional. In other words, if the operation
7118 fails while executing the insert phase nothing is written to the
7119 binlog.
7120 */
7121 row_logging_has_trans=
7122 ((sql_command_flags[table->in_use->lex->sql_command] &
7123 (CF_SCHEMA_CHANGE | CF_ADMIN_COMMAND)) ||
7124 table->file->has_transactions_and_rollback());
7125 }
7126 else
7127 {
7128 /* Check row_logging has not been properly cleared from previous command */
7129 DBUG_ASSERT(row_logging == 0);
7130 }
7131 DBUG_RETURN(row_logging);
7132 }
7133
7134
7135 /*
7136 Do all initialization needed for insert
7137 */
7138
prepare_for_insert(bool do_create)7139 int handler::prepare_for_insert(bool do_create)
7140 {
7141 /* Preparation for unique of blob's */
7142 if (table->s->long_unique_table || table->s->period.unique_keys)
7143 {
7144 if (do_create && create_lookup_handler())
7145 return 1;
7146 alloc_lookup_buffer();
7147 }
7148 return 0;
7149 }
7150
7151
ha_write_row(const uchar * buf)7152 int handler::ha_write_row(const uchar *buf)
7153 {
7154 int error;
7155 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7156 m_lock_type == F_WRLCK);
7157 DBUG_ENTER("handler::ha_write_row");
7158 DEBUG_SYNC_C("ha_write_row_start");
7159
7160 if ((error= ha_check_overlaps(NULL, buf)))
7161 DBUG_RETURN(error);
7162
7163 if (table->s->long_unique_table && this == table->file)
7164 {
7165 DBUG_ASSERT(inited == NONE || lookup_handler != this);
7166 if ((error= check_duplicate_long_entries(buf)))
7167 DBUG_RETURN(error);
7168 }
7169
7170 MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
7171 mark_trx_read_write();
7172 increment_statistics(&SSV::ha_write_count);
7173
7174 TABLE_IO_WAIT(tracker, PSI_TABLE_WRITE_ROW, MAX_KEY, error,
7175 { error= write_row(buf); })
7176
7177 MYSQL_INSERT_ROW_DONE(error);
7178 if (likely(!error))
7179 {
7180 rows_changed++;
7181 if (row_logging)
7182 {
7183 Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
7184 error= binlog_log_row(table, 0, buf, log_func);
7185 }
7186 #ifdef WITH_WSREP
7187 if (WSREP_NNULL(ha_thd()) && table_share->tmp_table == NO_TMP_TABLE &&
7188 ht->flags & HTON_WSREP_REPLICATION &&
7189 !error && (error= wsrep_after_row(ha_thd())))
7190 {
7191 DBUG_RETURN(error);
7192 }
7193 #endif /* WITH_WSREP */
7194 }
7195
7196 DEBUG_SYNC_C("ha_write_row_end");
7197 DBUG_RETURN(error);
7198 }
7199
7200
ha_update_row(const uchar * old_data,const uchar * new_data)7201 int handler::ha_update_row(const uchar *old_data, const uchar *new_data)
7202 {
7203 int error;
7204 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7205 m_lock_type == F_WRLCK);
7206 /*
7207 Some storage engines require that the new record is in record[0]
7208 (and the old record is in record[1]).
7209 */
7210 DBUG_ASSERT(new_data == table->record[0]);
7211 DBUG_ASSERT(old_data == table->record[1]);
7212
7213 uint saved_status= table->status;
7214 error= ha_check_overlaps(old_data, new_data);
7215
7216 if (!error && table->s->long_unique_table && this == table->file)
7217 error= check_duplicate_long_entries_update(new_data);
7218 table->status= saved_status;
7219
7220 if (error)
7221 return error;
7222
7223 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7224 mark_trx_read_write();
7225 increment_statistics(&SSV::ha_update_count);
7226
7227 TABLE_IO_WAIT(tracker, PSI_TABLE_UPDATE_ROW, active_index, 0,
7228 { error= update_row(old_data, new_data);})
7229
7230 MYSQL_UPDATE_ROW_DONE(error);
7231 if (likely(!error))
7232 {
7233 rows_changed++;
7234 if (row_logging)
7235 {
7236 Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
7237 error= binlog_log_row(table, old_data, new_data, log_func);
7238 }
7239 #ifdef WITH_WSREP
7240 THD *thd= ha_thd();
7241 if (WSREP_NNULL(thd))
7242 {
7243 /* for streaming replication, the following wsrep_after_row()
7244 may replicate a fragment, so we have to declare potential PA
7245 unsafe before that */
7246 if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd))
7247 {
7248 WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
7249 if (thd->wsrep_cs().mark_transaction_pa_unsafe())
7250 WSREP_DEBUG("session does not have active transaction,"
7251 " can not mark as PA unsafe");
7252 }
7253
7254 if (!error && table_share->tmp_table == NO_TMP_TABLE &&
7255 ht->flags & HTON_WSREP_REPLICATION)
7256 error= wsrep_after_row(thd);
7257 }
7258 #endif /* WITH_WSREP */
7259 }
7260 return error;
7261 }
7262
7263 /*
7264 Update first row. Only used by sequence tables
7265 */
7266
update_first_row(const uchar * new_data)7267 int handler::update_first_row(const uchar *new_data)
7268 {
7269 int error;
7270 if (likely(!(error= ha_rnd_init(1))))
7271 {
7272 int end_error;
7273 if (likely(!(error= ha_rnd_next(table->record[1]))))
7274 {
7275 /*
7276 We have to do the memcmp as otherwise we may get error 169 from InnoDB
7277 */
7278 if (memcmp(new_data, table->record[1], table->s->reclength))
7279 error= update_row(table->record[1], new_data);
7280 }
7281 end_error= ha_rnd_end();
7282 if (likely(!error))
7283 error= end_error;
7284 /* Logging would be wrong if update_row works but ha_rnd_end fails */
7285 DBUG_ASSERT(!end_error || error != 0);
7286 }
7287 return error;
7288 }
7289
7290
ha_delete_row(const uchar * buf)7291 int handler::ha_delete_row(const uchar *buf)
7292 {
7293 int error;
7294 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7295 m_lock_type == F_WRLCK);
7296 /*
7297 Normally table->record[0] is used, but sometimes table->record[1] is used.
7298 */
7299 DBUG_ASSERT(buf == table->record[0] ||
7300 buf == table->record[1]);
7301
7302 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
7303 mark_trx_read_write();
7304 increment_statistics(&SSV::ha_delete_count);
7305
7306 TABLE_IO_WAIT(tracker, PSI_TABLE_DELETE_ROW, active_index, error,
7307 { error= delete_row(buf);})
7308 MYSQL_DELETE_ROW_DONE(error);
7309 if (likely(!error))
7310 {
7311 rows_changed++;
7312 if (row_logging)
7313 {
7314 Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
7315 error= binlog_log_row(table, buf, 0, log_func);
7316 }
7317 #ifdef WITH_WSREP
7318 THD *thd= ha_thd();
7319 if (WSREP_NNULL(thd))
7320 {
7321 /* for streaming replication, the following wsrep_after_row()
7322 may replicate a fragment, so we have to declare potential PA
7323 unsafe before that */
7324 if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd))
7325 {
7326 WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
7327 if (thd->wsrep_cs().mark_transaction_pa_unsafe())
7328 WSREP_DEBUG("session does not have active transaction,"
7329 " can not mark as PA unsafe");
7330 }
7331
7332 if (!error && table_share->tmp_table == NO_TMP_TABLE &&
7333 ht->flags & HTON_WSREP_REPLICATION)
7334 error= wsrep_after_row(thd);
7335 }
7336 #endif /* WITH_WSREP */
7337 }
7338 return error;
7339 }
7340
7341
7342 /**
7343 Execute a direct update request. A direct update request updates all
7344 qualified rows in a single operation, rather than one row at a time.
7345 In a Spider cluster the direct update operation is pushed down to the
7346 child levels of the cluster.
7347
7348 Note that this can't be used in case of statment logging
7349
7350 @param update_rows Number of updated rows.
7351
7352 @retval 0 Success.
7353 @retval != 0 Failure.
7354 */
7355
ha_direct_update_rows(ha_rows * update_rows,ha_rows * found_rows)7356 int handler::ha_direct_update_rows(ha_rows *update_rows, ha_rows *found_rows)
7357 {
7358 int error;
7359 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7360 mark_trx_read_write();
7361
7362 error= direct_update_rows(update_rows, found_rows);
7363 MYSQL_UPDATE_ROW_DONE(error);
7364 return error;
7365 }
7366
7367
7368 /**
7369 Execute a direct delete request. A direct delete request deletes all
7370 qualified rows in a single operation, rather than one row at a time.
7371 In a Spider cluster the direct delete operation is pushed down to the
7372 child levels of the cluster.
7373
7374 @param delete_rows Number of deleted rows.
7375
7376 @retval 0 Success.
7377 @retval != 0 Failure.
7378 */
7379
ha_direct_delete_rows(ha_rows * delete_rows)7380 int handler::ha_direct_delete_rows(ha_rows *delete_rows)
7381 {
7382 int error;
7383 /* Ensure we are not using binlog row */
7384 DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row());
7385
7386 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
7387 mark_trx_read_write();
7388
7389 error = direct_delete_rows(delete_rows);
7390 MYSQL_DELETE_ROW_DONE(error);
7391 return error;
7392 }
7393
7394
7395 /** @brief
7396 use_hidden_primary_key() is called in case of an update/delete when
7397 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
7398 but we don't have a primary key
7399 */
use_hidden_primary_key()7400 void handler::use_hidden_primary_key()
7401 {
7402 /* fallback to use all columns in the table to identify row */
7403 table->column_bitmaps_set(&table->s->all_set, table->write_set);
7404 }
7405
7406
7407 /**
7408 Get an initialized ha_share.
7409
7410 @return Initialized ha_share
7411 @retval NULL ha_share is not yet initialized.
7412 @retval != NULL previous initialized ha_share.
7413
7414 @note
7415 If not a temp table, then LOCK_ha_data must be held.
7416 */
7417
get_ha_share_ptr()7418 Handler_share *handler::get_ha_share_ptr()
7419 {
7420 DBUG_ENTER("handler::get_ha_share_ptr");
7421 DBUG_ASSERT(ha_share);
7422 DBUG_ASSERT(table_share);
7423
7424 #ifndef DBUG_OFF
7425 if (table_share->tmp_table == NO_TMP_TABLE)
7426 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7427 #endif
7428
7429 DBUG_RETURN(*ha_share);
7430 }
7431
7432
7433 /**
7434 Set ha_share to be used by all instances of the same table/partition.
7435
7436 @param ha_share Handler_share to be shared.
7437
7438 @note
7439 If not a temp table, then LOCK_ha_data must be held.
7440 */
7441
set_ha_share_ptr(Handler_share * arg_ha_share)7442 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
7443 {
7444 DBUG_ENTER("handler::set_ha_share_ptr");
7445 DBUG_ASSERT(ha_share);
7446 #ifndef DBUG_OFF
7447 if (table_share->tmp_table == NO_TMP_TABLE)
7448 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7449 #endif
7450
7451 *ha_share= arg_ha_share;
7452 DBUG_VOID_RETURN;
7453 }
7454
7455
7456 /**
7457 Take a lock for protecting shared handler data.
7458 */
7459
lock_shared_ha_data()7460 void handler::lock_shared_ha_data()
7461 {
7462 DBUG_ASSERT(table_share);
7463 if (table_share->tmp_table == NO_TMP_TABLE)
7464 mysql_mutex_lock(&table_share->LOCK_ha_data);
7465 }
7466
7467
7468 /**
7469 Release lock for protecting ha_share.
7470 */
7471
unlock_shared_ha_data()7472 void handler::unlock_shared_ha_data()
7473 {
7474 DBUG_ASSERT(table_share);
7475 if (table_share->tmp_table == NO_TMP_TABLE)
7476 mysql_mutex_unlock(&table_share->LOCK_ha_data);
7477 }
7478
7479 /** @brief
7480 Dummy function which accept information about log files which is not need
7481 by handlers
7482 */
signal_log_not_needed(struct handlerton,char * log_file)7483 void signal_log_not_needed(struct handlerton, char *log_file)
7484 {
7485 DBUG_ENTER("signal_log_not_needed");
7486 DBUG_PRINT("enter", ("logfile '%s'", log_file));
7487 DBUG_VOID_RETURN;
7488 }
7489
set_lock_type(enum thr_lock_type lock)7490 void handler::set_lock_type(enum thr_lock_type lock)
7491 {
7492 table->reginfo.lock_type= lock;
7493 }
7494
compare_key_parts(const Field & old_field,const Column_definition & new_field,const KEY_PART_INFO & old_part,const KEY_PART_INFO & new_part) const7495 Compare_keys handler::compare_key_parts(const Field &old_field,
7496 const Column_definition &new_field,
7497 const KEY_PART_INFO &old_part,
7498 const KEY_PART_INFO &new_part) const
7499 {
7500 if (!old_field.is_equal(new_field))
7501 return Compare_keys::NotEqual;
7502
7503 if (old_part.length != new_part.length)
7504 return Compare_keys::NotEqual;
7505
7506 return Compare_keys::Equal;
7507 }
7508
7509 #ifdef WITH_WSREP
7510 /**
7511 @details
7512 This function makes the storage engine to force the victim transaction
7513 to abort. Currently, only innodb has this functionality, but any SE
7514 implementing the wsrep API should provide this service to support
7515 multi-master operation.
7516
7517 @note Aborting the transaction does NOT end it, it still has to
7518 be rolled back with hton->rollback().
7519
7520 @note It is safe to abort from one thread (bf_thd) the transaction,
7521 running in another thread (victim_thd), because InnoDB's lock_sys and
7522 trx_mutex guarantee the necessary protection. However, its not safe
7523 to access victim_thd->transaction, because it's not protected from
7524 concurrent accesses. And it's an overkill to take LOCK_plugin and
7525 iterate the whole installed_htons[] array every time.
7526
7527 @param bf_thd brute force THD asking for the abort
7528 @param victim_thd victim THD to be aborted
7529
7530 @return
7531 always 0
7532 */
7533
ha_abort_transaction(THD * bf_thd,THD * victim_thd,my_bool signal)7534 int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
7535 {
7536 DBUG_ENTER("ha_abort_transaction");
7537 if (!WSREP(bf_thd) &&
7538 !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
7539 wsrep_thd_is_toi(bf_thd))) {
7540 DBUG_RETURN(0);
7541 }
7542
7543 handlerton *hton= installed_htons[DB_TYPE_INNODB];
7544 if (hton && hton->abort_transaction)
7545 {
7546 hton->abort_transaction(hton, bf_thd, victim_thd, signal);
7547 }
7548 else
7549 {
7550 WSREP_WARN("Cannot abort InnoDB transaction");
7551 }
7552
7553 DBUG_RETURN(0);
7554 }
7555 #endif /* WITH_WSREP */
7556
7557
7558 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
7559 /*
7560 Example of transaction log management functions based on assumption that logs
7561 placed into a directory
7562 */
7563 #include <my_dir.h>
7564 #include <my_sys.h>
example_of_iterator_using_for_logs_cleanup(handlerton * hton)7565 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
7566 {
7567 void *buffer;
7568 int res= 1;
7569 struct handler_iterator iterator;
7570 struct handler_log_file_data data;
7571
7572 if (!hton->create_iterator)
7573 return 1; /* iterator creator is not supported */
7574
7575 if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
7576 HA_ITERATOR_OK)
7577 {
7578 /* error during creation of log iterator or iterator is not supported */
7579 return 1;
7580 }
7581 while((*iterator.next)(&iterator, (void*)&data) == 0)
7582 {
7583 printf("%s\n", data.filename.str);
7584 if (data.status == HA_LOG_STATUS_FREE &&
7585 mysql_file_delete(INSTRUMENT_ME,
7586 data.filename.str, MYF(MY_WME)))
7587 goto err;
7588 }
7589 res= 0;
7590 err:
7591 (*iterator.destroy)(&iterator);
7592 return res;
7593 }
7594
7595
7596 /*
7597 Here we should get info from handler where it save logs but here is
7598 just example, so we use constant.
7599 IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
7600 rights on it except root and it consist of directories only at lest for
7601 *nix (sorry, can't find windows-safe solution here, but it is only example).
7602 */
7603 #define fl_dir FN_ROOTDIR
7604
7605
7606 /** @brief
7607 Dummy function to return log status should be replaced by function which
7608 really detect the log status and check that the file is a log of this
7609 handler.
7610 */
fl_get_log_status(char * log)7611 enum log_status fl_get_log_status(char *log)
7612 {
7613 MY_STAT stat_buff;
7614 if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
7615 return HA_LOG_STATUS_INUSE;
7616 return HA_LOG_STATUS_NOSUCHLOG;
7617 }
7618
7619
7620 struct fl_buff
7621 {
7622 LEX_STRING *names;
7623 enum log_status *statuses;
7624 uint32 entries;
7625 uint32 current;
7626 };
7627
7628
fl_log_iterator_next(struct handler_iterator * iterator,void * iterator_object)7629 int fl_log_iterator_next(struct handler_iterator *iterator,
7630 void *iterator_object)
7631 {
7632 struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
7633 struct handler_log_file_data *data=
7634 (struct handler_log_file_data *) iterator_object;
7635 if (buff->current >= buff->entries)
7636 return 1;
7637 data->filename= buff->names[buff->current];
7638 data->status= buff->statuses[buff->current];
7639 buff->current++;
7640 return 0;
7641 }
7642
7643
fl_log_iterator_destroy(struct handler_iterator * iterator)7644 void fl_log_iterator_destroy(struct handler_iterator *iterator)
7645 {
7646 my_free(iterator->buffer);
7647 }
7648
7649
7650 /** @brief
7651 returns buffer, to be assigned in handler_iterator struct
7652 */
7653 enum handler_create_iterator_result
fl_log_iterator_buffer_init(struct handler_iterator * iterator)7654 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
7655 {
7656 MY_DIR *dirp;
7657 struct fl_buff *buff;
7658 char *name_ptr;
7659 uchar *ptr;
7660 FILEINFO *file;
7661 uint32 i;
7662
7663 /* to be able to make my_free without crash in case of error */
7664 iterator->buffer= 0;
7665
7666 if (!(dirp = my_dir(fl_dir, MYF(MY_THREAD_SPECIFIC))))
7667 {
7668 return HA_ITERATOR_ERROR;
7669 }
7670 if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
7671 ((ALIGN_SIZE(sizeof(LEX_STRING)) +
7672 sizeof(enum log_status) +
7673 + FN_REFLEN + 1) *
7674 (uint) dirp->number_off_files),
7675 MYF(MY_THREAD_SPECIFIC))) == 0)
7676 {
7677 return HA_ITERATOR_ERROR;
7678 }
7679 buff= (struct fl_buff *)ptr;
7680 buff->entries= buff->current= 0;
7681 ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
7682 buff->names= (LEX_STRING*) (ptr);
7683 ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
7684 (uint) dirp->number_off_files));
7685 buff->statuses= (enum log_status *)(ptr);
7686 name_ptr= (char *)(ptr + (sizeof(enum log_status) *
7687 (uint) dirp->number_off_files));
7688 for (i=0 ; i < (uint) dirp->number_off_files ; i++)
7689 {
7690 enum log_status st;
7691 file= dirp->dir_entry + i;
7692 if ((file->name[0] == '.' &&
7693 ((file->name[1] == '.' && file->name[2] == '\0') ||
7694 file->name[1] == '\0')))
7695 continue;
7696 if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
7697 continue;
7698 name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
7699 FN_REFLEN, fl_dir, file->name, NullS);
7700 buff->names[buff->entries].length= (name_ptr -
7701 buff->names[buff->entries].str);
7702 buff->statuses[buff->entries]= st;
7703 buff->entries++;
7704 }
7705
7706 iterator->buffer= buff;
7707 iterator->next= &fl_log_iterator_next;
7708 iterator->destroy= &fl_log_iterator_destroy;
7709 my_dirend(dirp);
7710 return HA_ITERATOR_OK;
7711 }
7712
7713
7714 /* An example of a iterator creator */
7715 enum handler_create_iterator_result
fl_create_iterator(enum handler_iterator_type type,struct handler_iterator * iterator)7716 fl_create_iterator(enum handler_iterator_type type,
7717 struct handler_iterator *iterator)
7718 {
7719 switch(type) {
7720 case HA_TRANSACTLOG_ITERATOR:
7721 return fl_log_iterator_buffer_init(iterator);
7722 default:
7723 return HA_ITERATOR_UNSUPPORTED;
7724 }
7725 }
7726 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
7727
7728
check_conflicting_charset_declarations(CHARSET_INFO * cs)7729 bool HA_CREATE_INFO::check_conflicting_charset_declarations(CHARSET_INFO *cs)
7730 {
7731 if ((used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
7732 /* DEFAULT vs explicit, or explicit vs DEFAULT */
7733 (((default_table_charset == NULL) != (cs == NULL)) ||
7734 /* Two different explicit character sets */
7735 (default_table_charset && cs &&
7736 !my_charset_same(default_table_charset, cs))))
7737 {
7738 my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
7739 "CHARACTER SET ", default_table_charset ?
7740 default_table_charset->csname : "DEFAULT",
7741 "CHARACTER SET ", cs ? cs->csname : "DEFAULT");
7742 return true;
7743 }
7744 return false;
7745 }
7746
7747 /* Remove all indexes for a given table from global index statistics */
7748
7749 static
del_global_index_stats_for_table(THD * thd,uchar * cache_key,size_t cache_key_length)7750 int del_global_index_stats_for_table(THD *thd, uchar* cache_key, size_t cache_key_length)
7751 {
7752 int res = 0;
7753 DBUG_ENTER("del_global_index_stats_for_table");
7754
7755 mysql_mutex_lock(&LOCK_global_index_stats);
7756
7757 for (uint i= 0; i < global_index_stats.records;)
7758 {
7759 INDEX_STATS *index_stats =
7760 (INDEX_STATS*) my_hash_element(&global_index_stats, i);
7761
7762 /* We search correct db\0table_name\0 string */
7763 if (index_stats &&
7764 index_stats->index_name_length >= cache_key_length &&
7765 !memcmp(index_stats->index, cache_key, cache_key_length))
7766 {
7767 res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7768 /*
7769 In our HASH implementation on deletion one elements
7770 is moved into a place where a deleted element was,
7771 and the last element is moved into the empty space.
7772 Thus we need to re-examine the current element, but
7773 we don't have to restart the search from the beginning.
7774 */
7775 }
7776 else
7777 i++;
7778 }
7779
7780 mysql_mutex_unlock(&LOCK_global_index_stats);
7781 DBUG_RETURN(res);
7782 }
7783
7784 /* Remove a table from global table statistics */
7785
del_global_table_stat(THD * thd,const LEX_CSTRING * db,const LEX_CSTRING * table)7786 int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table)
7787 {
7788 TABLE_STATS *table_stats;
7789 int res = 0;
7790 uchar *cache_key;
7791 size_t cache_key_length;
7792 DBUG_ENTER("del_global_table_stat");
7793
7794 cache_key_length= db->length + 1 + table->length + 1;
7795
7796 if(!(cache_key= (uchar *)my_malloc(PSI_INSTRUMENT_ME, cache_key_length,
7797 MYF(MY_WME | MY_ZEROFILL))))
7798 {
7799 /* Out of memory error already given */
7800 res = 1;
7801 goto end;
7802 }
7803
7804 memcpy(cache_key, db->str, db->length);
7805 memcpy(cache_key + db->length + 1, table->str, table->length);
7806
7807 res= del_global_index_stats_for_table(thd, cache_key, cache_key_length);
7808
7809 mysql_mutex_lock(&LOCK_global_table_stats);
7810
7811 if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats,
7812 cache_key,
7813 cache_key_length)))
7814 res= my_hash_delete(&global_table_stats, (uchar*)table_stats);
7815
7816 my_free(cache_key);
7817 mysql_mutex_unlock(&LOCK_global_table_stats);
7818
7819 end:
7820 DBUG_RETURN(res);
7821 }
7822
7823 /* Remove a index from global index statistics */
7824
del_global_index_stat(THD * thd,TABLE * table,KEY * key_info)7825 int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info)
7826 {
7827 INDEX_STATS *index_stats;
7828 size_t key_length= table->s->table_cache_key.length + key_info->name.length + 1;
7829 int res = 0;
7830 DBUG_ENTER("del_global_index_stat");
7831 mysql_mutex_lock(&LOCK_global_index_stats);
7832
7833 if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
7834 key_info->cache_name,
7835 key_length)))
7836 res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7837
7838 mysql_mutex_unlock(&LOCK_global_index_stats);
7839 DBUG_RETURN(res);
7840 }
7841
7842 /*****************************************************************************
7843 VERSIONING functions
7844 ******************************************************************************/
7845
is_start(const char * name) const7846 bool Vers_parse_info::is_start(const char *name) const
7847 {
7848 DBUG_ASSERT(name);
7849 return as_row.start && as_row.start.streq(name);
7850 }
is_end(const char * name) const7851 bool Vers_parse_info::is_end(const char *name) const
7852 {
7853 DBUG_ASSERT(name);
7854 return as_row.end && as_row.end.streq(name);
7855 }
is_start(const Create_field & f) const7856 bool Vers_parse_info::is_start(const Create_field &f) const
7857 {
7858 return f.flags & VERS_ROW_START;
7859 }
is_end(const Create_field & f) const7860 bool Vers_parse_info::is_end(const Create_field &f) const
7861 {
7862 return f.flags & VERS_ROW_END;
7863 }
7864
vers_init_sys_field(THD * thd,const char * field_name,int flags,bool integer)7865 static Create_field *vers_init_sys_field(THD *thd, const char *field_name, int flags, bool integer)
7866 {
7867 Create_field *f= new (thd->mem_root) Create_field();
7868 if (!f)
7869 return NULL;
7870
7871 f->field_name.str= field_name;
7872 f->field_name.length= strlen(field_name);
7873 f->charset= system_charset_info;
7874 f->flags= flags | NOT_NULL_FLAG;
7875 if (integer)
7876 {
7877 DBUG_ASSERT(0); // Not implemented yet
7878 f->set_handler(&type_handler_vers_trx_id);
7879 f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1;
7880 f->flags|= UNSIGNED_FLAG;
7881 }
7882 else
7883 {
7884 f->set_handler(&type_handler_timestamp2);
7885 f->length= MAX_DATETIME_PRECISION;
7886 }
7887 f->invisible= DBUG_EVALUATE_IF("sysvers_show", VISIBLE, INVISIBLE_SYSTEM);
7888
7889 if (f->check(thd))
7890 return NULL;
7891
7892 return f;
7893 }
7894
vers_create_sys_field(THD * thd,const char * field_name,Alter_info * alter_info,int flags)7895 static bool vers_create_sys_field(THD *thd, const char *field_name,
7896 Alter_info *alter_info, int flags)
7897 {
7898 Create_field *f= vers_init_sys_field(thd, field_name, flags, false);
7899 if (!f)
7900 return true;
7901
7902 alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7903 alter_info->create_list.push_back(f);
7904
7905 return false;
7906 }
7907
7908 const Lex_ident Vers_parse_info::default_start= "row_start";
7909 const Lex_ident Vers_parse_info::default_end= "row_end";
7910
fix_implicit(THD * thd,Alter_info * alter_info)7911 bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info)
7912 {
7913 // If user specified some of these he must specify the others too. Do nothing.
7914 if (*this)
7915 return false;
7916
7917 alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7918
7919 period= start_end_t(default_start, default_end);
7920 as_row= period;
7921
7922 if (vers_create_sys_field(thd, default_start, alter_info, VERS_ROW_START) ||
7923 vers_create_sys_field(thd, default_end, alter_info, VERS_ROW_END))
7924 {
7925 return true;
7926 }
7927 return false;
7928 }
7929
7930
vers_fix_system_fields(THD * thd,Alter_info * alter_info,const TABLE_LIST & create_table)7931 bool Table_scope_and_contents_source_st::vers_fix_system_fields(
7932 THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table)
7933 {
7934 DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING));
7935
7936 DBUG_EXECUTE_IF("sysvers_force", if (!tmp_table()) {
7937 alter_info->flags|= ALTER_ADD_SYSTEM_VERSIONING;
7938 options|= HA_VERSIONED_TABLE; });
7939
7940 if (!vers_info.need_check(alter_info))
7941 return false;
7942
7943 const bool add_versioning= alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING;
7944
7945 if (!vers_info.versioned_fields && vers_info.unversioned_fields && !add_versioning)
7946 {
7947 // All is correct but this table is not versioned.
7948 options&= ~HA_VERSIONED_TABLE;
7949 return false;
7950 }
7951
7952 if (!add_versioning && vers_info && !vers_info.versioned_fields)
7953 {
7954 my_error(ER_MISSING, MYF(0), create_table.table_name.str,
7955 "WITH SYSTEM VERSIONING");
7956 return true;
7957 }
7958
7959 List_iterator<Create_field> it(alter_info->create_list);
7960 while (Create_field *f= it++)
7961 {
7962 if (f->vers_sys_field())
7963 continue;
7964 if ((f->versioning == Column_definition::VERSIONING_NOT_SET && !add_versioning) ||
7965 f->versioning == Column_definition::WITHOUT_VERSIONING)
7966 {
7967 f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
7968 }
7969 } // while (Create_field *f= it++)
7970
7971 if (vers_info.fix_implicit(thd, alter_info))
7972 return true;
7973
7974 return false;
7975 }
7976
7977
vers_check_system_fields(THD * thd,Alter_info * alter_info,const Lex_table_name & table_name,const Lex_table_name & db,int select_count)7978 bool Table_scope_and_contents_source_st::vers_check_system_fields(
7979 THD *thd, Alter_info *alter_info, const Lex_table_name &table_name,
7980 const Lex_table_name &db, int select_count)
7981 {
7982 if (!(options & HA_VERSIONED_TABLE))
7983 return false;
7984
7985 uint versioned_fields= 0;
7986
7987 if (!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING))
7988 {
7989 uint fieldnr= 0;
7990 List_iterator<Create_field> field_it(alter_info->create_list);
7991 while (Create_field *f= field_it++)
7992 {
7993 /*
7994 The field from the CREATE part can be duplicated in the SELECT part of
7995 CREATE...SELECT. In that case double counts should be avoided.
7996 select_create::create_table_from_items just pushes the fields back into
7997 the create_list, without additional manipulations, so the fields from
7998 SELECT go last there.
7999 */
8000 bool is_dup= false;
8001 if (fieldnr >= alter_info->create_list.elements - select_count)
8002 {
8003 List_iterator<Create_field> dup_it(alter_info->create_list);
8004 for (Create_field *dup= dup_it++; !is_dup && dup != f; dup= dup_it++)
8005 is_dup= Lex_ident(dup->field_name).streq(f->field_name);
8006 }
8007
8008 if (!(f->flags & VERS_UPDATE_UNVERSIONED_FLAG) && !is_dup)
8009 versioned_fields++;
8010 fieldnr++;
8011 }
8012 if (versioned_fields == VERSIONING_FIELDS)
8013 {
8014 my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(0), table_name.str);
8015 return true;
8016 }
8017 }
8018
8019 if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && !versioned_fields)
8020 return false;
8021
8022 return vers_info.check_sys_fields(table_name, db, alter_info);
8023 }
8024
8025
fix_alter_info(THD * thd,Alter_info * alter_info,HA_CREATE_INFO * create_info,TABLE * table)8026 bool Vers_parse_info::fix_alter_info(THD *thd, Alter_info *alter_info,
8027 HA_CREATE_INFO *create_info, TABLE *table)
8028 {
8029 TABLE_SHARE *share= table->s;
8030 const char *table_name= share->table_name.str;
8031
8032 if (!need_check(alter_info) && !share->versioned)
8033 return false;
8034
8035 if (DBUG_EVALUATE_IF("sysvers_force", 0, share->tmp_table))
8036 {
8037 my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "CREATE TEMPORARY TABLE");
8038 return true;
8039 }
8040
8041 if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING &&
8042 table->versioned())
8043 {
8044 my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
8045 return true;
8046 }
8047
8048 if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)
8049 {
8050 if (!share->versioned)
8051 {
8052 my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
8053 return true;
8054 }
8055 #ifdef WITH_PARTITION_STORAGE_ENGINE
8056 if (table->part_info &&
8057 table->part_info->part_type == VERSIONING_PARTITION)
8058 {
8059 my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(0), table_name);
8060 return true;
8061 }
8062 #endif
8063
8064 return false;
8065 }
8066
8067 if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING))
8068 {
8069 List_iterator_fast<Create_field> it(alter_info->create_list);
8070 while (Create_field *f= it++)
8071 {
8072 if (f->flags & VERS_SYSTEM_FIELD)
8073 {
8074 if (!table->versioned())
8075 {
8076 my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->s->table_name.str);
8077 return true;
8078 }
8079 my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0),
8080 f->flags & VERS_ROW_START ? "START" : "END", f->field_name.str);
8081 return true;
8082 }
8083 }
8084 }
8085
8086 if ((alter_info->flags & ALTER_DROP_PERIOD ||
8087 versioned_fields || unversioned_fields) && !share->versioned)
8088 {
8089 my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
8090 return true;
8091 }
8092
8093 if (share->versioned)
8094 {
8095 if (alter_info->flags & ALTER_ADD_PERIOD)
8096 {
8097 my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
8098 return true;
8099 }
8100
8101 // copy info from existing table
8102 create_info->options|= HA_VERSIONED_TABLE;
8103
8104 DBUG_ASSERT(share->vers_start_field());
8105 DBUG_ASSERT(share->vers_end_field());
8106 Lex_ident start(share->vers_start_field()->field_name);
8107 Lex_ident end(share->vers_end_field()->field_name);
8108 DBUG_ASSERT(start.str);
8109 DBUG_ASSERT(end.str);
8110
8111 as_row= start_end_t(start, end);
8112 period= as_row;
8113
8114 if (alter_info->create_list.elements)
8115 {
8116 List_iterator_fast<Create_field> it(alter_info->create_list);
8117 while (Create_field *f= it++)
8118 {
8119 if (f->versioning == Column_definition::WITHOUT_VERSIONING)
8120 f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
8121
8122 if (f->change.str && (start.streq(f->change) || end.streq(f->change)))
8123 {
8124 my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str);
8125 return true;
8126 }
8127 }
8128 }
8129
8130 return false;
8131 }
8132
8133 if (fix_implicit(thd, alter_info))
8134 return true;
8135
8136 if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)
8137 {
8138 if (check_sys_fields(table_name, share->db, alter_info))
8139 return true;
8140 }
8141
8142 return false;
8143 }
8144
8145 bool
fix_create_like(Alter_info & alter_info,HA_CREATE_INFO & create_info,TABLE_LIST & src_table,TABLE_LIST & table)8146 Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info,
8147 TABLE_LIST &src_table, TABLE_LIST &table)
8148 {
8149 List_iterator<Create_field> it(alter_info.create_list);
8150 List_iterator<Key> key_it(alter_info.key_list);
8151 List_iterator<Key_part_spec> kp_it;
8152 Create_field *f, *f_start=NULL, *f_end= NULL;
8153
8154 DBUG_ASSERT(alter_info.create_list.elements > 2);
8155
8156 if (create_info.tmp_table())
8157 {
8158 int remove= 2;
8159 while (remove && (f= it++))
8160 {
8161 if (f->flags & VERS_SYSTEM_FIELD)
8162 {
8163 it.remove();
8164 remove--;
8165 }
8166 key_it.rewind();
8167 while (Key *key= key_it++)
8168 {
8169 kp_it.init(key->columns);
8170 while (Key_part_spec *kp= kp_it++)
8171 {
8172 if (0 == lex_string_cmp(system_charset_info, &kp->field_name,
8173 &f->field_name))
8174 {
8175 kp_it.remove();
8176 }
8177 }
8178 if (0 == key->columns.elements)
8179 {
8180 key_it.remove();
8181 }
8182 }
8183 }
8184 DBUG_ASSERT(remove == 0);
8185 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
8186 ER_UNKNOWN_ERROR,
8187 "System versioning is stripped from temporary `%s.%s`",
8188 table.db.str, table.table_name.str);
8189 return false;
8190 }
8191
8192 while ((f= it++))
8193 {
8194 if (f->flags & VERS_ROW_START)
8195 {
8196 f_start= f;
8197 if (f_end)
8198 break;
8199 }
8200 else if (f->flags & VERS_ROW_END)
8201 {
8202 f_end= f;
8203 if (f_start)
8204 break;
8205 }
8206 }
8207
8208 if (!f_start || !f_end)
8209 {
8210 my_error(ER_MISSING, MYF(0), src_table.table_name.str,
8211 f_start ? "AS ROW END" : "AS ROW START");
8212 return true;
8213 }
8214
8215 as_row= start_end_t(f_start->field_name, f_end->field_name);
8216 period= as_row;
8217
8218 create_info.options|= HA_VERSIONED_TABLE;
8219 return false;
8220 }
8221
need_check(const Alter_info * alter_info) const8222 bool Vers_parse_info::need_check(const Alter_info *alter_info) const
8223 {
8224 return versioned_fields || unversioned_fields ||
8225 alter_info->flags & ALTER_ADD_PERIOD ||
8226 alter_info->flags & ALTER_DROP_PERIOD ||
8227 alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING ||
8228 alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING || *this;
8229 }
8230
check_conditions(const Lex_table_name & table_name,const Lex_table_name & db) const8231 bool Vers_parse_info::check_conditions(const Lex_table_name &table_name,
8232 const Lex_table_name &db) const
8233 {
8234 if (!as_row.start || !as_row.end)
8235 {
8236 my_error(ER_MISSING, MYF(0), table_name.str,
8237 as_row.start ? "AS ROW END" : "AS ROW START");
8238 return true;
8239 }
8240
8241 if (!period.start || !period.end)
8242 {
8243 my_error(ER_MISSING, MYF(0), table_name.str, "PERIOD FOR SYSTEM_TIME");
8244 return true;
8245 }
8246
8247 if (!as_row.start.streq(period.start) ||
8248 !as_row.end.streq(period.end))
8249 {
8250 my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
8251 return true;
8252 }
8253
8254 if (db.streq(MYSQL_SCHEMA_NAME))
8255 {
8256 my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(0), MYSQL_SCHEMA_NAME.str);
8257 return true;
8258 }
8259 return false;
8260 }
8261
is_versioning_timestamp(const Column_definition * f)8262 static bool is_versioning_timestamp(const Column_definition *f)
8263 {
8264 return f->type_handler() == &type_handler_timestamp2 &&
8265 f->length == MAX_DATETIME_FULL_WIDTH;
8266 }
8267
is_some_bigint(const Column_definition * f)8268 static bool is_some_bigint(const Column_definition *f)
8269 {
8270 return f->type_handler() == &type_handler_slonglong ||
8271 f->type_handler() == &type_handler_ulonglong ||
8272 f->type_handler() == &type_handler_vers_trx_id;
8273 }
8274
is_versioning_bigint(const Column_definition * f)8275 static bool is_versioning_bigint(const Column_definition *f)
8276 {
8277 return is_some_bigint(f) && f->flags & UNSIGNED_FLAG &&
8278 f->length == MY_INT64_NUM_DECIMAL_DIGITS - 1;
8279 }
8280
require_timestamp_error(const char * field,const char * table)8281 static void require_timestamp_error(const char *field, const char *table)
8282 {
8283 my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "TIMESTAMP(6)", table);
8284 }
8285
require_trx_id_error(const char * field,const char * table)8286 static void require_trx_id_error(const char *field, const char *table)
8287 {
8288 my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "BIGINT(20) UNSIGNED",
8289 table);
8290 }
8291
8292
check_sys_fields(const LEX_CSTRING & table_name,const Column_definition * row_start,const Column_definition * row_end) const8293 bool Vers_type_timestamp::check_sys_fields(const LEX_CSTRING &table_name,
8294 const Column_definition *row_start,
8295 const Column_definition *row_end) const
8296 {
8297 if (!is_versioning_timestamp(row_start))
8298 {
8299 require_timestamp_error(row_start->field_name.str, table_name.str);
8300 return true;
8301 }
8302
8303 if (row_end->type_handler()->vers() != this ||
8304 !is_versioning_timestamp(row_end))
8305 {
8306 require_timestamp_error(row_end->field_name.str, table_name.str);
8307 return true;
8308 }
8309
8310 return false;
8311 }
8312
8313
check_sys_fields(const LEX_CSTRING & table_name,const Column_definition * row_start,const Column_definition * row_end) const8314 bool Vers_type_trx::check_sys_fields(const LEX_CSTRING &table_name,
8315 const Column_definition *row_start,
8316 const Column_definition *row_end) const
8317 {
8318 if (!is_versioning_bigint(row_start))
8319 {
8320 require_trx_id_error(row_start->field_name.str, table_name.str);
8321 return true;
8322 }
8323
8324 if (row_end->type_handler()->vers() != this ||
8325 !is_versioning_bigint(row_end))
8326 {
8327 require_trx_id_error(row_end->field_name.str, table_name.str);
8328 return true;
8329 }
8330
8331 if (!is_some_bigint(row_start))
8332 {
8333 require_timestamp_error(row_start->field_name.str, table_name.str);
8334 return true;
8335 }
8336
8337 if (!TR_table::use_transaction_registry)
8338 {
8339 my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
8340 return true;
8341 }
8342
8343 return false;
8344 }
8345
8346
check_sys_fields(const Lex_table_name & table_name,const Lex_table_name & db,Alter_info * alter_info) const8347 bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
8348 const Lex_table_name &db,
8349 Alter_info *alter_info) const
8350 {
8351 if (check_conditions(table_name, db))
8352 return true;
8353
8354 List_iterator<Create_field> it(alter_info->create_list);
8355 const Create_field *row_start= nullptr;
8356 const Create_field *row_end= nullptr;
8357 while (const Create_field *f= it++)
8358 {
8359 if (f->flags & VERS_ROW_START && !row_start)
8360 row_start= f;
8361 if (f->flags & VERS_ROW_END && !row_end)
8362 row_end= f;
8363 }
8364
8365 if (!row_start || !row_end)
8366 {
8367 my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
8368 return true;
8369 }
8370
8371 const Vers_type_handler *row_start_vers= row_start->type_handler()->vers();
8372
8373 if (!row_start_vers)
8374 {
8375 require_timestamp_error(row_start->field_name.str, table_name);
8376 return true;
8377 }
8378
8379 return row_start_vers->check_sys_fields(table_name, row_start, row_end);
8380 }
8381
check_field(const Create_field * f,const Lex_ident & f_name) const8382 bool Table_period_info::check_field(const Create_field* f,
8383 const Lex_ident& f_name) const
8384 {
8385 bool res= false;
8386 if (!f)
8387 {
8388 my_error(ER_BAD_FIELD_ERROR, MYF(0), f_name.str, name.str);
8389 res= true;
8390 }
8391 else if (f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATE &&
8392 f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATETIME)
8393 {
8394 my_error(ER_WRONG_FIELD_SPEC, MYF(0), f->field_name.str);
8395 res= true;
8396 }
8397 else if (f->vcol_info || f->flags & VERS_SYSTEM_FIELD)
8398 {
8399 my_error(ER_PERIOD_FIELD_WRONG_ATTRIBUTES, MYF(0),
8400 f->field_name.str, "GENERATED ALWAYS AS");
8401 res= true;
8402 }
8403
8404 return res;
8405 }
8406
check_fields(THD * thd,Alter_info * alter_info,const Lex_table_name & table_name,const Lex_table_name & db,int select_count)8407 bool Table_scope_and_contents_source_st::check_fields(
8408 THD *thd, Alter_info *alter_info,
8409 const Lex_table_name &table_name, const Lex_table_name &db, int select_count)
8410 {
8411 return vers_check_system_fields(thd, alter_info,
8412 table_name, db, select_count) ||
8413 check_period_fields(thd, alter_info);
8414 }
8415
check_period_fields(THD * thd,Alter_info * alter_info)8416 bool Table_scope_and_contents_source_st::check_period_fields(
8417 THD *thd, Alter_info *alter_info)
8418 {
8419 if (!period_info.name)
8420 return false;
8421
8422 if (tmp_table())
8423 {
8424 my_error(ER_PERIOD_TEMPORARY_NOT_ALLOWED, MYF(0));
8425 return true;
8426 }
8427
8428 Table_period_info::start_end_t &period= period_info.period;
8429 const Create_field *row_start= NULL;
8430 const Create_field *row_end= NULL;
8431 List_iterator<Create_field> it(alter_info->create_list);
8432 while (const Create_field *f= it++)
8433 {
8434 if (period.start.streq(f->field_name)) row_start= f;
8435 else if (period.end.streq(f->field_name)) row_end= f;
8436
8437 if (period_info.name.streq(f->field_name))
8438 {
8439 my_error(ER_DUP_FIELDNAME, MYF(0), f->field_name.str);
8440 return true;
8441 }
8442 }
8443
8444 bool res= period_info.check_field(row_start, period.start.str)
8445 || period_info.check_field(row_end, period.end.str);
8446 if (res)
8447 return true;
8448
8449 if (row_start->type_handler() != row_end->type_handler()
8450 || row_start->length != row_end->length)
8451 {
8452 my_error(ER_PERIOD_TYPES_MISMATCH, MYF(0), period_info.name.str);
8453 res= true;
8454 }
8455
8456 return res;
8457 }
8458
8459 bool
fix_create_fields(THD * thd,Alter_info * alter_info,const TABLE_LIST & create_table)8460 Table_scope_and_contents_source_st::fix_create_fields(THD *thd,
8461 Alter_info *alter_info,
8462 const TABLE_LIST &create_table)
8463 {
8464 return vers_fix_system_fields(thd, alter_info, create_table)
8465 || fix_period_fields(thd, alter_info);
8466 }
8467
8468 bool
fix_period_fields(THD * thd,Alter_info * alter_info)8469 Table_scope_and_contents_source_st::fix_period_fields(THD *thd,
8470 Alter_info *alter_info)
8471 {
8472 if (!period_info.name)
8473 return false;
8474
8475 Table_period_info::start_end_t &period= period_info.period;
8476 List_iterator<Create_field> it(alter_info->create_list);
8477 while (Create_field *f= it++)
8478 {
8479 if (period.start.streq(f->field_name) || period.end.streq(f->field_name))
8480 {
8481 f->period= &period_info;
8482 f->flags|= NOT_NULL_FLAG;
8483 }
8484 }
8485 return false;
8486 }
8487