1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2021, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software Foundation,
15    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16 
17 /** @file handler.cc
18 
19     @brief
20   Handler-calling-functions
21 */
22 
23 #include "mariadb.h"
24 #include <inttypes.h>
25 #include "sql_priv.h"
26 #include "unireg.h"
27 #include "rpl_rli.h"
28 #include "sql_cache.h"                   // query_cache, query_cache_*
29 #include "sql_connect.h"                 // global_table_stats
30 #include "key.h"     // key_copy, key_unpack, key_cmp_if_same, key_cmp
31 #include "sql_table.h"                   // build_table_filename
32 #include "sql_parse.h"                          // check_stack_overrun
33 #include "sql_base.h"           // TDC_element
34 #include "discover.h"           // extension_based_table_discovery, etc
35 #include "log_event.h"          // *_rows_log_event
36 #include "create_options.h"
37 #include <myisampack.h>
38 #include "transaction.h"
39 #include "myisam.h"
40 #include "probes_mysql.h"
41 #include <mysql/psi/mysql_table.h>
42 #include <pfs_transaction_provider.h>
43 #include <mysql/psi/mysql_transaction.h>
44 #include "debug_sync.h"         // DEBUG_SYNC
45 #include "sql_audit.h"
46 #include "ha_sequence.h"
47 #include "rowid_filter.h"
48 #include "mysys_err.h"
49 
50 #ifdef WITH_PARTITION_STORAGE_ENGINE
51 #include "ha_partition.h"
52 #endif
53 
54 #ifdef WITH_ARIA_STORAGE_ENGINE
55 #include "../storage/maria/ha_maria.h"
56 #endif
57 #include "semisync_master.h"
58 
59 #include "wsrep_mysqld.h"
60 #ifdef WITH_WSREP
61 #include "wsrep_binlog.h"
62 #include "wsrep_xid.h"
63 #include "wsrep_thd.h"
64 #include "wsrep_trans_observer.h" /* wsrep transaction hooks */
65 #include "wsrep_var.h"            /* wsrep_hton_check() */
66 #endif /* WITH_WSREP */
67 
68 /**
69   @def MYSQL_TABLE_LOCK_WAIT
70   Instrumentation helper for table io_waits.
71   @param OP the table operation to be performed
72   @param FLAGS per table operation flags.
73   @param PAYLOAD the code to instrument.
74   @sa MYSQL_END_TABLE_WAIT.
75 */
76 #ifdef HAVE_PSI_TABLE_INTERFACE
77   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD)    \
78     {                                                  \
79       if (m_psi != NULL)                               \
80       {                                                \
81         PSI_table_locker *locker;                      \
82         PSI_table_locker_state state;                  \
83         locker= PSI_TABLE_CALL(start_table_lock_wait)  \
84           (& state, m_psi, OP, FLAGS,                  \
85           __FILE__, __LINE__);                         \
86         PAYLOAD                                        \
87         if (locker != NULL)                            \
88           PSI_TABLE_CALL(end_table_lock_wait)(locker); \
89       }                                                \
90       else                                             \
91       {                                                \
92         PAYLOAD                                        \
93       }                                                \
94     }
95 #else
96   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
97     PAYLOAD
98 #endif
99 
100 
101 /*
102   While we have legacy_db_type, we have this array to
103   check for dups and to find handlerton from legacy_db_type.
104   Remove when legacy_db_type is finally gone
105 */
106 st_plugin_int *hton2plugin[MAX_HA];
107 
108 static handlerton *installed_htons[128];
109 
110 #define BITMAP_STACKBUF_SIZE (128/8)
111 
112 KEY_CREATE_INFO default_key_create_info=
113 { HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true };
114 
115 /* number of entries in handlertons[] */
116 ulong total_ha= 0;
117 /* number of storage engines (from handlertons[]) that support 2pc */
118 ulong total_ha_2pc= 0;
119 #ifdef DBUG_ASSERT_EXISTS
120 /*
121   Number of non-mandatory 2pc handlertons whose initialization failed
122   to estimate total_ha_2pc value under supposition of the failures
123   have not occcured.
124 */
125 ulong failed_ha_2pc= 0;
126 #endif
127 /* size of savepoint storage area (see ha_init) */
128 ulong savepoint_alloc_size= 0;
129 
130 static const LEX_CSTRING sys_table_aliases[]=
131 {
132   { STRING_WITH_LEN("INNOBASE") },  { STRING_WITH_LEN("INNODB") },
133   { STRING_WITH_LEN("HEAP") },      { STRING_WITH_LEN("MEMORY") },
134   { STRING_WITH_LEN("MERGE") },     { STRING_WITH_LEN("MRG_MYISAM") },
135   { STRING_WITH_LEN("Maria") },     { STRING_WITH_LEN("Aria") },
136   {NullS, 0}
137 };
138 
139 const char *ha_row_type[] = {
140   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE"
141 };
142 
143 const char *tx_isolation_names[] =
144 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
145   NullS};
146 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
147 			       tx_isolation_names, NULL};
148 
149 static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
150 uint known_extensions_id= 0;
151 
152 
153 class Table_exists_error_handler : public Internal_error_handler
154 {
155 public:
Table_exists_error_handler()156   Table_exists_error_handler()
157     : m_handled_errors(0), m_unhandled_errors(0)
158   {}
159 
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_warning_level * level,const char * msg,Sql_condition ** cond_hdl)160   bool handle_condition(THD *thd,
161                         uint sql_errno,
162                         const char* sqlstate,
163                         Sql_condition::enum_warning_level *level,
164                         const char* msg,
165                         Sql_condition ** cond_hdl)
166   {
167     *cond_hdl= NULL;
168     if (non_existing_table_error(sql_errno))
169     {
170       m_handled_errors++;
171       return TRUE;
172     }
173 
174     if (*level == Sql_condition::WARN_LEVEL_ERROR)
175       m_unhandled_errors++;
176     return FALSE;
177   }
178 
safely_trapped_errors()179   bool safely_trapped_errors()
180   {
181     return ((m_handled_errors > 0) && (m_unhandled_errors == 0));
182   }
183 
184 private:
185   int m_handled_errors;
186   int m_unhandled_errors;
187 };
188 
189 
190 static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
191                               bool is_real_trans);
192 
193 
ha_default_plugin(THD * thd)194 static plugin_ref ha_default_plugin(THD *thd)
195 {
196   if (thd->variables.table_plugin)
197     return thd->variables.table_plugin;
198   return my_plugin_lock(thd, global_system_variables.table_plugin);
199 }
200 
ha_default_tmp_plugin(THD * thd)201 static plugin_ref ha_default_tmp_plugin(THD *thd)
202 {
203   if (thd->variables.tmp_table_plugin)
204     return thd->variables.tmp_table_plugin;
205   if (global_system_variables.tmp_table_plugin)
206     return my_plugin_lock(thd, global_system_variables.tmp_table_plugin);
207   return ha_default_plugin(thd);
208 }
209 
210 
211 /** @brief
212   Return the default storage engine handlerton for thread
213 
214   SYNOPSIS
215     ha_default_handlerton(thd)
216     thd         current thread
217 
218   RETURN
219     pointer to handlerton
220 */
ha_default_handlerton(THD * thd)221 handlerton *ha_default_handlerton(THD *thd)
222 {
223   plugin_ref plugin= ha_default_plugin(thd);
224   DBUG_ASSERT(plugin);
225   handlerton *hton= plugin_hton(plugin);
226   DBUG_ASSERT(hton);
227   return hton;
228 }
229 
230 
ha_default_tmp_handlerton(THD * thd)231 handlerton *ha_default_tmp_handlerton(THD *thd)
232 {
233   plugin_ref plugin= ha_default_tmp_plugin(thd);
234   DBUG_ASSERT(plugin);
235   handlerton *hton= plugin_hton(plugin);
236   DBUG_ASSERT(hton);
237   return hton;
238 }
239 
240 
241 /** @brief
242   Return the storage engine handlerton for the supplied name
243 
244   SYNOPSIS
245     ha_resolve_by_name(thd, name)
246     thd         current thread
247     name        name of storage engine
248 
249   RETURN
250     pointer to storage engine plugin handle
251 */
ha_resolve_by_name(THD * thd,const LEX_CSTRING * name,bool tmp_table)252 plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
253                               bool tmp_table)
254 {
255   const LEX_CSTRING *table_alias;
256   plugin_ref plugin;
257 
258 redo:
259   if (thd && !my_charset_latin1.strnncoll(
260                            (const uchar *)name->str, name->length,
261                            (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
262     return tmp_table ?  ha_default_tmp_plugin(thd) : ha_default_plugin(thd);
263 
264   if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
265   {
266     handlerton *hton= plugin_hton(plugin);
267     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
268       return plugin;
269 
270     /*
271       unlocking plugin immediately after locking is relatively low cost.
272     */
273     plugin_unlock(thd, plugin);
274   }
275 
276   /*
277     We check for the historical aliases.
278   */
279   for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
280   {
281     if (!my_charset_latin1.strnncoll(
282                       (const uchar *)name->str, name->length,
283                       (const uchar *)table_alias->str, table_alias->length))
284     {
285       name= table_alias + 1;
286       goto redo;
287     }
288   }
289 
290   return NULL;
291 }
292 
293 
294 bool
resolve_storage_engine_with_error(THD * thd,handlerton ** ha,bool tmp_table)295 Storage_engine_name::resolve_storage_engine_with_error(THD *thd,
296                                                        handlerton **ha,
297                                                        bool tmp_table)
298 {
299   if (plugin_ref plugin= ha_resolve_by_name(thd, &m_storage_engine_name,
300                                             tmp_table))
301   {
302     *ha= plugin_hton(plugin);
303     return false;
304   }
305 
306   *ha= NULL;
307   if (thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)
308   {
309     my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), m_storage_engine_name.str);
310     return true;
311   }
312   push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
313                       ER_UNKNOWN_STORAGE_ENGINE,
314                       ER_THD(thd, ER_UNKNOWN_STORAGE_ENGINE),
315                       m_storage_engine_name.str);
316   return false;
317 }
318 
319 
ha_lock_engine(THD * thd,const handlerton * hton)320 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
321 {
322   if (hton)
323   {
324     st_plugin_int *plugin= hton2plugin[hton->slot];
325     return my_plugin_lock(thd, plugin_int_to_ref(plugin));
326   }
327   return NULL;
328 }
329 
330 
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)331 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
332 {
333   plugin_ref plugin;
334   switch (db_type) {
335   case DB_TYPE_DEFAULT:
336     return ha_default_handlerton(thd);
337   default:
338     if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
339         (plugin= ha_lock_engine(thd, installed_htons[db_type])))
340       return plugin_hton(plugin);
341     /* fall through */
342   case DB_TYPE_UNKNOWN:
343     return NULL;
344   }
345 }
346 
347 
348 /**
349   Use other database handler if databasehandler is not compiled in.
350 */
ha_checktype(THD * thd,handlerton * hton,bool no_substitute)351 handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute)
352 {
353   if (ha_storage_engine_is_enabled(hton))
354     return hton;
355 
356   if (no_substitute)
357     return NULL;
358 #ifdef WITH_WSREP
359   (void)wsrep_after_rollback(thd, false);
360 #endif /* WITH_WSREP */
361 
362   return ha_default_handlerton(thd);
363 } /* ha_checktype */
364 
365 
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)366 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
367                          handlerton *db_type)
368 {
369   handler *file;
370   DBUG_ENTER("get_new_handler");
371   DBUG_PRINT("enter", ("alloc: %p", alloc));
372 
373   if (ha_storage_engine_is_enabled(db_type))
374   {
375     if ((file= db_type->create(db_type, share, alloc)))
376       file->init();
377     DBUG_RETURN(file);
378   }
379   /*
380     Try the default table type
381     Here the call to current_thd() is ok as we call this function a lot of
382     times but we enter this branch very seldom.
383   */
384   file= get_new_handler(share, alloc, ha_default_handlerton(current_thd));
385   DBUG_RETURN(file);
386 }
387 
388 
389 #ifdef WITH_PARTITION_STORAGE_ENGINE
get_ha_partition(partition_info * part_info)390 handler *get_ha_partition(partition_info *part_info)
391 {
392   ha_partition *partition;
393   DBUG_ENTER("get_ha_partition");
394   if ((partition= new ha_partition(partition_hton, part_info)))
395   {
396     if (partition->initialize_partition(current_thd->mem_root))
397     {
398       delete partition;
399       partition= 0;
400     }
401     else
402       partition->init();
403   }
404   else
405   {
406     my_error(ER_OUTOFMEMORY, MYF(ME_FATAL),
407              static_cast<int>(sizeof(ha_partition)));
408   }
409   DBUG_RETURN(((handler*) partition));
410 }
411 #endif
412 
413 static const char **handler_errmsgs;
414 
415 C_MODE_START
get_handler_errmsgs(int nr)416 static const char **get_handler_errmsgs(int nr)
417 {
418   return handler_errmsgs;
419 }
420 C_MODE_END
421 
422 
423 /**
424   Register handler error messages for use with my_error().
425 
426   @retval
427     0           OK
428   @retval
429     !=0         Error
430 */
431 
ha_init_errors(void)432 int ha_init_errors(void)
433 {
434 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
435 
436   /* Allocate a pointer array for the error message strings. */
437   /* Zerofill it to avoid uninitialized gaps. */
438   if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
439                                                    HA_ERR_ERRORS * sizeof(char*),
440                                                    MYF(MY_WME | MY_ZEROFILL))))
441     return 1;
442 
443   /* Set the dedicated error messages. */
444   SETMSG(HA_ERR_KEY_NOT_FOUND,          ER_DEFAULT(ER_KEY_NOT_FOUND));
445   SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER_DEFAULT(ER_DUP_KEY));
446   SETMSG(HA_ERR_RECORD_CHANGED,         "Update which is recoverable");
447   SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
448   SETMSG(HA_ERR_CRASHED,                ER_DEFAULT(ER_NOT_KEYFILE));
449   SETMSG(HA_ERR_WRONG_IN_RECORD,        ER_DEFAULT(ER_CRASHED_ON_USAGE));
450   SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
451   SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
452   SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
453   SETMSG(HA_ERR_OLD_FILE,               ER_DEFAULT(ER_OLD_KEYFILE));
454   SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
455   SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
456   SETMSG(HA_ERR_RECORD_FILE_FULL,       ER_DEFAULT(ER_RECORD_FILE_FULL));
457   SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
458   SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
459   SETMSG(HA_ERR_UNSUPPORTED,            ER_DEFAULT(ER_ILLEGAL_HA));
460   SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
461   SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
462   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER_DEFAULT(ER_DUP_UNIQUE));
463   SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
464   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER_DEFAULT(ER_WRONG_MRG_TABLE));
465   SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER_DEFAULT(ER_CRASHED_ON_REPAIR));
466   SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER_DEFAULT(ER_CRASHED_ON_USAGE));
467   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
468   SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER_DEFAULT(ER_LOCK_TABLE_FULL));
469   SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
470   SETMSG(HA_ERR_LOCK_DEADLOCK,          ER_DEFAULT(ER_LOCK_DEADLOCK));
471   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
472   SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
473   SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
474   SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
475   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
476   SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
477   SETMSG(HA_ERR_TABLE_EXIST,            ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
478   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
479   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
480   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
481   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
482   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
483   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
484   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
485   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
486   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,	ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
487   SETMSG(HA_ERR_INDEX_CORRUPT,		ER_DEFAULT(ER_INDEX_CORRUPT));
488   SETMSG(HA_FTS_INVALID_DOCID,		"Invalid InnoDB FTS Doc ID");
489   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
490   SETMSG(HA_ERR_DISK_FULL,              ER_DEFAULT(ER_DISK_FULL));
491   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
492   SETMSG(HA_ERR_FK_DEPTH_EXCEEDED,      "Foreign key cascade delete/update exceeds");
493   SETMSG(HA_ERR_TABLESPACE_MISSING,     ER_DEFAULT(ER_TABLESPACE_MISSING));
494 
495   /* Register the error messages for use with my_error(). */
496   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
497 }
498 
499 
500 /**
501   Unregister handler error messages.
502 
503   @retval
504     0           OK
505   @retval
506     !=0         Error
507 */
ha_finish_errors(void)508 static int ha_finish_errors(void)
509 {
510   /* Allocate a pointer array for the error message strings. */
511   my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
512   my_free(handler_errmsgs);
513   handler_errmsgs= 0;
514   return 0;
515 }
516 
517 static Atomic_counter<int32> need_full_discover_for_existence(0);
518 static Atomic_counter<int32> engines_with_discover_file_names(0);
519 static Atomic_counter<int32> engines_with_discover(0);
520 
full_discover_for_existence(handlerton *,const char *,const char *)521 static int full_discover_for_existence(handlerton *, const char *, const char *)
522 { return 0; }
523 
ext_based_existence(handlerton *,const char *,const char *)524 static int ext_based_existence(handlerton *, const char *, const char *)
525 { return 0; }
526 
hton_ext_based_table_discovery(handlerton * hton,LEX_CSTRING * db,MY_DIR * dir,handlerton::discovered_list * result)527 static int hton_ext_based_table_discovery(handlerton *hton, LEX_CSTRING *db,
528                              MY_DIR *dir, handlerton::discovered_list *result)
529 {
530   /*
531     tablefile_extensions[0] is the metadata file, see
532     the comment above tablefile_extensions declaration
533   */
534   return extension_based_table_discovery(dir, hton->tablefile_extensions[0],
535                                          result);
536 }
537 
update_discovery_counters(handlerton * hton,int val)538 static void update_discovery_counters(handlerton *hton, int val)
539 {
540   if (hton->discover_table_existence == full_discover_for_existence)
541     need_full_discover_for_existence+= val;
542 
543   if (hton->discover_table_names && hton->tablefile_extensions[0])
544     engines_with_discover_file_names+= val;
545 
546   if (hton->discover_table)
547     engines_with_discover+= val;
548 }
549 
ha_drop_table(THD * thd,handlerton * hton,const char * path)550 int ha_drop_table(THD *thd, handlerton *hton, const char *path)
551 {
552   if (ha_check_if_updates_are_ignored(thd, hton, "DROP"))
553     return 0;                                   // Simulate dropped
554   return hton->drop_table(hton, path);
555 }
556 
hton_drop_table(handlerton * hton,const char * path)557 static int hton_drop_table(handlerton *hton, const char *path)
558 {
559   char tmp_path[FN_REFLEN];
560   handler *file= get_new_handler(nullptr, current_thd->mem_root, hton);
561   if (!file)
562     return ENOMEM;
563   path= get_canonical_filename(file, path, tmp_path);
564   int error= file->delete_table(path);
565   delete file;
566   return error;
567 }
568 
569 
ha_finalize_handlerton(st_plugin_int * plugin)570 int ha_finalize_handlerton(st_plugin_int *plugin)
571 {
572   handlerton *hton= (handlerton *)plugin->data;
573   DBUG_ENTER("ha_finalize_handlerton");
574 
575   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
576   if (!hton)
577     goto end;
578 
579   if (installed_htons[hton->db_type] == hton)
580     installed_htons[hton->db_type]= NULL;
581 
582   if (hton->panic)
583     hton->panic(hton, HA_PANIC_CLOSE);
584 
585   if (plugin->plugin->deinit)
586   {
587     /*
588       Today we have no defined/special behavior for uninstalling
589       engine plugins.
590     */
591     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
592     if (plugin->plugin->deinit(NULL))
593     {
594       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
595                              plugin->name.str));
596     }
597   }
598 
599   free_sysvar_table_options(hton);
600   update_discovery_counters(hton, -1);
601 
602   /*
603     In case a plugin is uninstalled and re-installed later, it should
604     reuse an array slot. Otherwise the number of uninstall/install
605     cycles would be limited.
606   */
607   if (hton->slot != HA_SLOT_UNDEF)
608   {
609     /* Make sure we are not unpluging another plugin */
610     DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
611     DBUG_ASSERT(hton->slot < MAX_HA);
612     hton2plugin[hton->slot]= NULL;
613   }
614 
615   my_free(hton);
616 
617  end:
618   DBUG_RETURN(0);
619 }
620 
621 
ha_initialize_handlerton(st_plugin_int * plugin)622 int ha_initialize_handlerton(st_plugin_int *plugin)
623 {
624   handlerton *hton;
625   static const char *no_exts[]= { 0 };
626   DBUG_ENTER("ha_initialize_handlerton");
627   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
628 
629   hton= (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton),
630                                 MYF(MY_WME | MY_ZEROFILL));
631   if (hton == NULL)
632   {
633     sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
634                     plugin->name.str);
635     goto err_no_hton_memory;
636   }
637 
638   hton->tablefile_extensions= no_exts;
639   hton->discover_table_names= hton_ext_based_table_discovery;
640   hton->drop_table= hton_drop_table;
641 
642   hton->slot= HA_SLOT_UNDEF;
643   /* Historical Requirement */
644   plugin->data= hton; // shortcut for the future
645   if (plugin->plugin->init && plugin->plugin->init(hton))
646   {
647     sql_print_error("Plugin '%s' init function returned error.",
648                     plugin->name.str);
649     goto err;
650   }
651 
652   // hton_ext_based_table_discovery() works only when discovery
653   // is supported and the engine if file-based.
654   if (hton->discover_table_names == hton_ext_based_table_discovery &&
655       (!hton->discover_table || !hton->tablefile_extensions[0]))
656     hton->discover_table_names= NULL;
657 
658   // default discover_table_existence implementation
659   if (!hton->discover_table_existence && hton->discover_table)
660   {
661     if (hton->tablefile_extensions[0])
662       hton->discover_table_existence= ext_based_existence;
663     else
664       hton->discover_table_existence= full_discover_for_existence;
665   }
666 
667   uint tmp;
668   ulong fslot;
669 
670   DBUG_EXECUTE_IF("unstable_db_type", {
671                     static int i= (int) DB_TYPE_FIRST_DYNAMIC;
672                     hton->db_type= (enum legacy_db_type)++i;
673                   });
674 
675   /* now check the db_type for conflict */
676   if (hton->db_type <= DB_TYPE_UNKNOWN ||
677       hton->db_type >= DB_TYPE_DEFAULT ||
678       installed_htons[hton->db_type])
679   {
680     int idx= (int) DB_TYPE_FIRST_DYNAMIC;
681 
682     while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
683       idx++;
684 
685     if (idx == (int) DB_TYPE_DEFAULT)
686     {
687       sql_print_warning("Too many storage engines!");
688       goto err_deinit;
689     }
690     if (hton->db_type != DB_TYPE_UNKNOWN)
691       sql_print_warning("Storage engine '%s' has conflicting typecode. "
692                         "Assigning value %d.", plugin->plugin->name, idx);
693     hton->db_type= (enum legacy_db_type) idx;
694   }
695 
696   /*
697     In case a plugin is uninstalled and re-installed later, it should
698     reuse an array slot. Otherwise the number of uninstall/install
699     cycles would be limited. So look for a free slot.
700   */
701   DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
702   for (fslot= 0; fslot < total_ha; fslot++)
703   {
704     if (!hton2plugin[fslot])
705       break;
706   }
707   if (fslot < total_ha)
708     hton->slot= fslot;
709   else
710   {
711     if (total_ha >= MAX_HA)
712     {
713       sql_print_error("Too many plugins loaded. Limit is %lu. "
714                       "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
715       goto err_deinit;
716     }
717     hton->slot= total_ha++;
718   }
719   installed_htons[hton->db_type]= hton;
720   tmp= hton->savepoint_offset;
721   hton->savepoint_offset= savepoint_alloc_size;
722   savepoint_alloc_size+= tmp;
723   hton2plugin[hton->slot]=plugin;
724   if (hton->prepare)
725   {
726     total_ha_2pc++;
727     if (tc_log && tc_log != get_tc_log_implementation())
728     {
729       total_ha_2pc--;
730       hton->prepare= 0;
731       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
732                           ER_UNKNOWN_ERROR,
733                           "Cannot enable tc-log at run-time. "
734                           "XA features of %s are disabled",
735                           plugin->name.str);
736     }
737   }
738 
739   /*
740     This is entirely for legacy. We will create a new "disk based" hton and a
741     "memory" hton which will be configurable longterm. We should be able to
742     remove partition.
743   */
744   switch (hton->db_type) {
745   case DB_TYPE_HEAP:
746     heap_hton= hton;
747     break;
748   case DB_TYPE_MYISAM:
749     myisam_hton= hton;
750     break;
751   case DB_TYPE_PARTITION_DB:
752     partition_hton= hton;
753     break;
754   case DB_TYPE_SEQUENCE:
755     sql_sequence_hton= hton;
756     break;
757   default:
758     break;
759   };
760 
761   resolve_sysvar_table_options(hton);
762   update_discovery_counters(hton, 1);
763 
764   DBUG_RETURN(0);
765 
766 err_deinit:
767   /*
768     Let plugin do its inner deinitialization as plugin->init()
769     was successfully called before.
770   */
771   if (plugin->plugin->deinit)
772     (void) plugin->plugin->deinit(NULL);
773 
774 err:
775 #ifdef DBUG_ASSERT_EXISTS
776   if (hton->prepare)
777     failed_ha_2pc++;
778 #endif
779   my_free(hton);
780 err_no_hton_memory:
781   plugin->data= NULL;
782   DBUG_RETURN(1);
783 }
784 
ha_init()785 int ha_init()
786 {
787   int error= 0;
788   DBUG_ENTER("ha_init");
789 
790   DBUG_ASSERT(total_ha < MAX_HA);
791   /*
792     Check if there is a transaction-capable storage engine besides the
793     binary log (which is considered a transaction-capable storage engine in
794     counting total_ha)
795   */
796   opt_using_transactions= total_ha > (ulong) opt_bin_log;
797   savepoint_alloc_size+= sizeof(SAVEPOINT);
798   DBUG_RETURN(error);
799 }
800 
ha_end()801 int ha_end()
802 {
803   int error= 0;
804   DBUG_ENTER("ha_end");
805 
806   /*
807     This should be eventually based on the graceful shutdown flag.
808     So if flag is equal to HA_PANIC_CLOSE, the deallocate
809     the errors.
810   */
811   if (unlikely(ha_finish_errors()))
812     error= 1;
813 
814   DBUG_RETURN(error);
815 }
816 
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)817 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
818                                  void *path)
819 {
820   handlerton *hton= plugin_hton(plugin);
821   if (hton->drop_database)
822     hton->drop_database(hton, (char *)path);
823   return FALSE;
824 }
825 
826 
ha_drop_database(char * path)827 void ha_drop_database(char* path)
828 {
829   plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
830 }
831 
832 
checkpoint_state_handlerton(THD * unused1,plugin_ref plugin,void * disable)833 static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
834                                            void *disable)
835 {
836   handlerton *hton= plugin_hton(plugin);
837   if (hton->checkpoint_state)
838     hton->checkpoint_state(hton, (int) *(bool*) disable);
839   return FALSE;
840 }
841 
842 
ha_checkpoint_state(bool disable)843 void ha_checkpoint_state(bool disable)
844 {
845   plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
846 }
847 
848 
849 struct st_commit_checkpoint_request {
850   void *cookie;
851   void (*pre_hook)(void *);
852 };
853 
commit_checkpoint_request_handlerton(THD * unused1,plugin_ref plugin,void * data)854 static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
855                                            void *data)
856 {
857   st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
858   handlerton *hton= plugin_hton(plugin);
859   if (hton->commit_checkpoint_request)
860   {
861     void *cookie= st->cookie;
862     if (st->pre_hook)
863       (*st->pre_hook)(cookie);
864     (*hton->commit_checkpoint_request)(cookie);
865   }
866   return FALSE;
867 }
868 
869 
870 /*
871   Invoke commit_checkpoint_request() in all storage engines that implement it.
872 
873   If pre_hook is non-NULL, the hook will be called prior to each invocation.
874 */
875 void
ha_commit_checkpoint_request(void * cookie,void (* pre_hook)(void *))876 ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
877 {
878   st_commit_checkpoint_request st;
879   st.cookie= cookie;
880   st.pre_hook= pre_hook;
881   plugin_foreach(NULL, commit_checkpoint_request_handlerton,
882                  MYSQL_STORAGE_ENGINE_PLUGIN, &st);
883 }
884 
885 
886 /**
887   @note
888     don't bother to rollback here, it's done already
889 
890   there's no need to rollback here as all transactions must
891   be rolled back already
892 */
ha_close_connection(THD * thd)893 void ha_close_connection(THD* thd)
894 {
895   for (auto i= 0; i < MAX_HA; i++)
896   {
897     if (thd->ha_data[i].lock)
898     {
899       handlerton *hton= plugin_hton(thd->ha_data[i].lock);
900       if (hton->close_connection)
901         hton->close_connection(hton, thd);
902       /* make sure SE didn't reset ha_data in close_connection() */
903       DBUG_ASSERT(thd->ha_data[i].lock);
904       /* make sure ha_data is reset and ha_data_lock is released */
905       thd_set_ha_data(thd, hton, 0);
906     }
907     DBUG_ASSERT(!thd->ha_data[i].ha_ptr);
908   }
909 }
910 
kill_handlerton(THD * thd,plugin_ref plugin,void * level)911 static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
912                                void *level)
913 {
914   handlerton *hton= plugin_hton(plugin);
915 
916   mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
917   if (hton->kill_query && thd_get_ha_data(thd, hton))
918     hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
919   return FALSE;
920 }
921 
ha_kill_query(THD * thd,enum thd_kill_levels level)922 void ha_kill_query(THD* thd, enum thd_kill_levels level)
923 {
924   DBUG_ENTER("ha_kill_query");
925   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level);
926   DBUG_VOID_RETURN;
927 }
928 
929 
930 /*****************************************************************************
931   Backup functions
932 ******************************************************************************/
933 
plugin_prepare_for_backup(THD * unused1,plugin_ref plugin,void * not_used)934 static my_bool plugin_prepare_for_backup(THD *unused1, plugin_ref plugin,
935                                          void *not_used)
936 {
937   handlerton *hton= plugin_hton(plugin);
938   if (hton->prepare_for_backup)
939     hton->prepare_for_backup();
940   return FALSE;
941 }
942 
ha_prepare_for_backup()943 void ha_prepare_for_backup()
944 {
945   plugin_foreach_with_mask(0, plugin_prepare_for_backup,
946                            MYSQL_STORAGE_ENGINE_PLUGIN,
947                            PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
948 }
949 
plugin_end_backup(THD * unused1,plugin_ref plugin,void * not_used)950 static my_bool plugin_end_backup(THD *unused1, plugin_ref plugin,
951                                  void *not_used)
952 {
953   handlerton *hton= plugin_hton(plugin);
954   if (hton->end_backup)
955     hton->end_backup();
956   return FALSE;
957 }
958 
ha_end_backup()959 void ha_end_backup()
960 {
961   plugin_foreach_with_mask(0, plugin_end_backup,
962                            MYSQL_STORAGE_ENGINE_PLUGIN,
963                            PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
964 }
965 
966 
967 /*
968   Inform plugin of the server shutdown.
969   Called after all connections are down.
970 
971   Under some circumstances, storage engine might need to
972   so some work, before deinit() can be safely called.
973   (an example is Innodb purge that might call into server
974    to calculate virtual columns, which might potentially also
975   invoke other plugins, such as audit
976 */
plugin_pre_shutdown(THD *,plugin_ref plugin,void *)977 static my_bool plugin_pre_shutdown(THD *, plugin_ref plugin, void *)
978 {
979   handlerton *hton= plugin_hton(plugin);
980   if (hton->pre_shutdown)
981     hton->pre_shutdown();
982   return FALSE;
983 }
984 
985 
ha_pre_shutdown()986 void ha_pre_shutdown()
987 {
988   plugin_foreach_with_mask(0, plugin_pre_shutdown,
989     MYSQL_STORAGE_ENGINE_PLUGIN,
990     PLUGIN_IS_DELETED | PLUGIN_IS_READY, 0);
991 }
992 
993 
994 /* ========================================================================
995  ======================= TRANSACTIONS ===================================*/
996 
997 /**
998   Transaction handling in the server
999   ==================================
1000 
1001   In each client connection, MySQL maintains two transactional
1002   states:
1003   - a statement transaction,
1004   - a standard, also called normal transaction.
1005 
1006   Historical note
1007   ---------------
1008   "Statement transaction" is a non-standard term that comes
1009   from the times when MySQL supported BerkeleyDB storage engine.
1010 
1011   First of all, it should be said that in BerkeleyDB auto-commit
1012   mode auto-commits operations that are atomic to the storage
1013   engine itself, such as a write of a record, and are too
1014   high-granular to be atomic from the application perspective
1015   (MySQL). One SQL statement could involve many BerkeleyDB
1016   auto-committed operations and thus BerkeleyDB auto-commit was of
1017   little use to MySQL.
1018 
1019   Secondly, instead of SQL standard savepoints, BerkeleyDB
1020   provided the concept of "nested transactions". In a nutshell,
1021   transactions could be arbitrarily nested, but when the parent
1022   transaction was committed or aborted, all its child (nested)
1023   transactions were handled committed or aborted as well.
1024   Commit of a nested transaction, in turn, made its changes
1025   visible, but not durable: it destroyed the nested transaction,
1026   all its changes would become available to the parent and
1027   currently active nested transactions of this parent.
1028 
1029   So the mechanism of nested transactions was employed to
1030   provide "all or nothing" guarantee of SQL statements
1031   required by the standard.
1032   A nested transaction would be created at start of each SQL
1033   statement, and destroyed (committed or aborted) at statement
1034   end. Such nested transaction was internally referred to as
1035   a "statement transaction" and gave birth to the term.
1036 
1037   (Historical note ends)
1038 
1039   Since then a statement transaction is started for each statement
1040   that accesses transactional tables or uses the binary log.  If
1041   the statement succeeds, the statement transaction is committed.
1042   If the statement fails, the transaction is rolled back. Commits
1043   of statement transactions are not durable -- each such
1044   transaction is nested in the normal transaction, and if the
1045   normal transaction is rolled back, the effects of all enclosed
1046   statement transactions are undone as well.  Technically,
1047   a statement transaction can be viewed as a savepoint which is
1048   maintained automatically in order to make effects of one
1049   statement atomic.
1050 
1051   The normal transaction is started by the user and is ended
1052   usually upon a user request as well. The normal transaction
1053   encloses transactions of all statements issued between
1054   its beginning and its end.
1055   In autocommit mode, the normal transaction is equivalent
1056   to the statement transaction.
1057 
1058   Since MySQL supports PSEA (pluggable storage engine
1059   architecture), more than one transactional engine can be
1060   active at a time. Hence transactions, from the server
1061   point of view, are always distributed. In particular,
1062   transactional state is maintained independently for each
1063   engine. In order to commit a transaction the two phase
1064   commit protocol is employed.
1065 
1066   Not all statements are executed in context of a transaction.
1067   Administrative and status information statements do not modify
1068   engine data, and thus do not start a statement transaction and
1069   also have no effect on the normal transaction. Examples of such
1070   statements are SHOW STATUS and RESET SLAVE.
1071 
1072   Similarly DDL statements are not transactional,
1073   and therefore a transaction is [almost] never started for a DDL
1074   statement. The difference between a DDL statement and a purely
1075   administrative statement though is that a DDL statement always
1076   commits the current transaction before proceeding, if there is
1077   any.
1078 
1079   At last, SQL statements that work with non-transactional
1080   engines also have no effect on the transaction state of the
1081   connection. Even though they are written to the binary log,
1082   and the binary log is, overall, transactional, the writes
1083   are done in "write-through" mode, directly to the binlog
1084   file, followed with a OS cache sync, in other words,
1085   bypassing the binlog undo log (translog).
1086   They do not commit the current normal transaction.
1087   A failure of a statement that uses non-transactional tables
1088   would cause a rollback of the statement transaction, but
1089   in case there no non-transactional tables are used,
1090   no statement transaction is started.
1091 
1092   Data layout
1093   -----------
1094 
1095   The server stores its transaction-related data in
1096   thd->transaction. This structure has two members of type
1097   THD_TRANS. These members correspond to the statement and
1098   normal transactions respectively:
1099 
1100   - thd->transaction.stmt contains a list of engines
1101   that are participating in the given statement
1102   - thd->transaction.all contains a list of engines that
1103   have participated in any of the statement transactions started
1104   within the context of the normal transaction.
1105   Each element of the list contains a pointer to the storage
1106   engine, engine-specific transactional data, and engine-specific
1107   transaction flags.
1108 
1109   In autocommit mode thd->transaction.all is empty.
1110   Instead, data of thd->transaction.stmt is
1111   used to commit/rollback the normal transaction.
1112 
1113   The list of registered engines has a few important properties:
1114   - no engine is registered in the list twice
1115   - engines are present in the list a reverse temporal order --
1116   new participants are always added to the beginning of the list.
1117 
1118   Transaction life cycle
1119   ----------------------
1120 
1121   When a new connection is established, thd->transaction
1122   members are initialized to an empty state.
1123   If a statement uses any tables, all affected engines
1124   are registered in the statement engine list. In
1125   non-autocommit mode, the same engines are registered in
1126   the normal transaction list.
1127   At the end of the statement, the server issues a commit
1128   or a roll back for all engines in the statement list.
1129   At this point transaction flags of an engine, if any, are
1130   propagated from the statement list to the list of the normal
1131   transaction.
1132   When commit/rollback is finished, the statement list is
1133   cleared. It will be filled in again by the next statement,
1134   and emptied again at the next statement's end.
1135 
1136   The normal transaction is committed in a similar way
1137   (by going over all engines in thd->transaction.all list)
1138   but at different times:
1139   - upon COMMIT SQL statement is issued by the user
1140   - implicitly, by the server, at the beginning of a DDL statement
1141   or SET AUTOCOMMIT={0|1} statement.
1142 
1143   The normal transaction can be rolled back as well:
1144   - if the user has requested so, by issuing ROLLBACK SQL
1145   statement
1146   - if one of the storage engines requested a rollback
1147   by setting thd->transaction_rollback_request. This may
1148   happen in case, e.g., when the transaction in the engine was
1149   chosen a victim of the internal deadlock resolution algorithm
1150   and rolled back internally. When such a situation happens, there
1151   is little the server can do and the only option is to rollback
1152   transactions in all other participating engines.  In this case
1153   the rollback is accompanied by an error sent to the user.
1154 
1155   As follows from the use cases above, the normal transaction
1156   is never committed when there is an outstanding statement
1157   transaction. In most cases there is no conflict, since
1158   commits of the normal transaction are issued by a stand-alone
1159   administrative or DDL statement, thus no outstanding statement
1160   transaction of the previous statement exists. Besides,
1161   all statements that manipulate with the normal transaction
1162   are prohibited in stored functions and triggers, therefore
1163   no conflicting situation can occur in a sub-statement either.
1164   The remaining rare cases when the server explicitly has
1165   to commit the statement transaction prior to committing the normal
1166   one cover error-handling scenarios (see for example
1167   SQLCOM_LOCK_TABLES).
1168 
1169   When committing a statement or a normal transaction, the server
1170   either uses the two-phase commit protocol, or issues a commit
1171   in each engine independently. The two-phase commit protocol
1172   is used only if:
1173   - all participating engines support two-phase commit (provide
1174     handlerton::prepare PSEA API call) and
1175   - transactions in at least two engines modify data (i.e. are
1176   not read-only).
1177 
1178   Note that the two phase commit is used for
1179   statement transactions, even though they are not durable anyway.
1180   This is done to ensure logical consistency of data in a multiple-
1181   engine transaction.
1182   For example, imagine that some day MySQL supports unique
1183   constraint checks deferred till the end of statement. In such
1184   case a commit in one of the engines may yield ER_DUP_KEY,
1185   and MySQL should be able to gracefully abort statement
1186   transactions of other participants.
1187 
1188   After the normal transaction has been committed,
1189   thd->transaction.all list is cleared.
1190 
1191   When a connection is closed, the current normal transaction, if
1192   any, is rolled back.
1193 
1194   Roles and responsibilities
1195   --------------------------
1196 
1197   The server has no way to know that an engine participates in
1198   the statement and a transaction has been started
1199   in it unless the engine says so. Thus, in order to be
1200   a part of a transaction, the engine must "register" itself.
1201   This is done by invoking trans_register_ha() server call.
1202   Normally the engine registers itself whenever handler::external_lock()
1203   is called. trans_register_ha() can be invoked many times: if
1204   an engine is already registered, the call does nothing.
1205   In case autocommit is not set, the engine must register itself
1206   twice -- both in the statement list and in the normal transaction
1207   list.
1208   In which list to register is a parameter of trans_register_ha().
1209 
1210   Note, that although the registration interface in itself is
1211   fairly clear, the current usage practice often leads to undesired
1212   effects. E.g. since a call to trans_register_ha() in most engines
1213   is embedded into implementation of handler::external_lock(), some
1214   DDL statements start a transaction (at least from the server
1215   point of view) even though they are not expected to. E.g.
1216   CREATE TABLE does not start a transaction, since
1217   handler::external_lock() is never called during CREATE TABLE. But
1218   CREATE TABLE ... SELECT does, since handler::external_lock() is
1219   called for the table that is being selected from. This has no
1220   practical effects currently, but must be kept in mind
1221   nevertheless.
1222 
1223   Once an engine is registered, the server will do the rest
1224   of the work.
1225 
1226   During statement execution, whenever any of data-modifying
1227   PSEA API methods is used, e.g. handler::write_row() or
1228   handler::update_row(), the read-write flag is raised in the
1229   statement transaction for the involved engine.
1230   Currently All PSEA calls are "traced", and the data can not be
1231   changed in a way other than issuing a PSEA call. Important:
1232   unless this invariant is preserved the server will not know that
1233   a transaction in a given engine is read-write and will not
1234   involve the two-phase commit protocol!
1235 
1236   At the end of a statement, server call trans_commit_stmt is
1237   invoked. This call in turn invokes handlerton::prepare()
1238   for every involved engine. Prepare is followed by a call
1239   to handlerton::commit_one_phase() If a one-phase commit
1240   will suffice, handlerton::prepare() is not invoked and
1241   the server only calls handlerton::commit_one_phase().
1242   At statement commit, the statement-related read-write
1243   engine flag is propagated to the corresponding flag in the
1244   normal transaction.  When the commit is complete, the list
1245   of registered engines is cleared.
1246 
1247   Rollback is handled in a similar fashion.
1248 
1249   Additional notes on DDL and the normal transaction.
1250   ---------------------------------------------------
1251 
1252   DDLs and operations with non-transactional engines
1253   do not "register" in thd->transaction lists, and thus do not
1254   modify the transaction state. Besides, each DDL in
1255   MySQL is prefixed with an implicit normal transaction commit
1256   (a call to trans_commit_implicit()), and thus leaves nothing
1257   to modify.
1258   However, as it has been pointed out with CREATE TABLE .. SELECT,
1259   some DDL statements can start a *new* transaction.
1260 
1261   Behaviour of the server in this case is currently badly
1262   defined.
1263   DDL statements use a form of "semantic" logging
1264   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1265   the newly created table is deleted.
1266   In addition, some DDL statements issue interim transaction
1267   commits: e.g. ALTER TABLE issues a commit after data is copied
1268   from the original table to the internal temporary table. Other
1269   statements, e.g. CREATE TABLE ... SELECT do not always commit
1270   after itself.
1271   And finally there is a group of DDL statements such as
1272   RENAME/DROP TABLE that doesn't start a new transaction
1273   and doesn't commit.
1274 
1275   This diversity makes it hard to say what will happen if
1276   by chance a stored function is invoked during a DDL --
1277   whether any modifications it makes will be committed or not
1278   is not clear. Fortunately, SQL grammar of few DDLs allows
1279   invocation of a stored function.
1280 
1281   A consistent behaviour is perhaps to always commit the normal
1282   transaction after all DDLs, just like the statement transaction
1283   is always committed at the end of all statements.
1284 */
1285 
1286 /**
1287   Register a storage engine for a transaction.
1288 
1289   Every storage engine MUST call this function when it starts
1290   a transaction or a statement (that is it must be called both for the
1291   "beginning of transaction" and "beginning of statement").
1292   Only storage engines registered for the transaction/statement
1293   will know when to commit/rollback it.
1294 
1295   @note
1296     trans_register_ha is idempotent - storage engine may register many
1297     times per transaction.
1298 
1299 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,ulonglong trxid)1300 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, ulonglong trxid)
1301 {
1302   THD_TRANS *trans;
1303   Ha_trx_info *ha_info;
1304   DBUG_ENTER("trans_register_ha");
1305   DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1306 
1307   if (all)
1308   {
1309     trans= &thd->transaction->all;
1310     thd->server_status|= SERVER_STATUS_IN_TRANS;
1311     if (thd->tx_read_only)
1312       thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1313     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1314   }
1315   else
1316     trans= &thd->transaction->stmt;
1317 
1318   ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1319 
1320   if (ha_info->is_started())
1321     DBUG_VOID_RETURN; /* already registered, return */
1322 
1323   ha_info->register_ha(trans, ht_arg);
1324 
1325   trans->no_2pc|=(ht_arg->prepare==0);
1326 
1327   /* Set implicit xid even if there's explicit XA, it will be ignored anyway. */
1328   if (thd->transaction->implicit_xid.is_null())
1329     thd->transaction->implicit_xid.set(thd->query_id);
1330 
1331 /*
1332   Register transaction start in performance schema if not done already.
1333   By doing this, we handle cases when the transaction is started implicitly in
1334   autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1335   executed statement is a single-statement transaction.
1336 
1337   Explicitly started transactions are handled in trans_begin().
1338 
1339   Do not register transactions in which binary log is the only participating
1340   transactional storage engine.
1341 */
1342   if (thd->m_transaction_psi == NULL && ht_arg->db_type != DB_TYPE_BINLOG)
1343   {
1344     thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1345           thd->get_xid(), trxid, thd->tx_isolation, thd->tx_read_only,
1346           !thd->in_multi_stmt_transaction_mode());
1347     DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1348     //gtid_set_performance_schema_values(thd);
1349   }
1350   DBUG_VOID_RETURN;
1351 }
1352 
1353 
prepare_or_error(handlerton * ht,THD * thd,bool all)1354 static int prepare_or_error(handlerton *ht, THD *thd, bool all)
1355 {
1356 #ifdef WITH_WSREP
1357   const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1358   if (run_wsrep_hooks && ht->flags & HTON_WSREP_REPLICATION &&
1359       wsrep_before_prepare(thd, all))
1360   {
1361     return(1);
1362   }
1363 #endif /* WITH_WSREP */
1364 
1365   int err= ht->prepare(ht, thd, all);
1366   status_var_increment(thd->status_var.ha_prepare_count);
1367   if (err)
1368   {
1369       my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1370   }
1371 #ifdef WITH_WSREP
1372   if (run_wsrep_hooks && !err && ht->flags & HTON_WSREP_REPLICATION &&
1373       wsrep_after_prepare(thd, all))
1374   {
1375     err= 1;
1376   }
1377 #endif /* WITH_WSREP */
1378 
1379   return err;
1380 }
1381 
1382 
1383 /**
1384   @retval
1385     0   ok
1386   @retval
1387     1   error, transaction was rolled back
1388 */
ha_prepare(THD * thd)1389 int ha_prepare(THD *thd)
1390 {
1391   int error=0, all=1;
1392   THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1393   Ha_trx_info *ha_info= trans->ha_list;
1394   DBUG_ENTER("ha_prepare");
1395 
1396   if (ha_info)
1397   {
1398     for (; ha_info; ha_info= ha_info->next())
1399     {
1400       handlerton *ht= ha_info->ht();
1401       if (ht->prepare)
1402       {
1403         if (unlikely(prepare_or_error(ht, thd, all)))
1404         {
1405           ha_rollback_trans(thd, all);
1406           error=1;
1407           break;
1408         }
1409       }
1410       else
1411       {
1412         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1413                             ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO),
1414                             HA_ERR_WRONG_COMMAND,
1415                             ha_resolve_storage_engine_name(ht));
1416 
1417       }
1418     }
1419 
1420     DEBUG_SYNC(thd, "at_unlog_xa_prepare");
1421 
1422     if (tc_log->unlog_xa_prepare(thd, all))
1423     {
1424       ha_rollback_trans(thd, all);
1425       error=1;
1426     }
1427   }
1428 
1429   DBUG_RETURN(error);
1430 }
1431 
1432 /*
1433   Like ha_check_and_coalesce_trx_read_only to return counted number of
1434   read-write transaction participants limited to two, but works in the 'all'
1435   context.
1436   Also returns the last found rw ha_info through the 2nd argument.
1437 */
ha_count_rw_all(THD * thd,Ha_trx_info ** ptr_ha_info)1438 uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info)
1439 {
1440   unsigned rw_ha_count= 0;
1441 
1442   for (auto ha_info= thd->transaction->all.ha_list; ha_info;
1443        ha_info= ha_info->next())
1444   {
1445     if (ha_info->is_trx_read_write())
1446     {
1447       *ptr_ha_info= ha_info;
1448       if (++rw_ha_count > 1)
1449         break;
1450     }
1451   }
1452   return rw_ha_count;
1453 }
1454 
1455 /**
1456   Check if we can skip the two-phase commit.
1457 
1458   A helper function to evaluate if two-phase commit is mandatory.
1459   As a side effect, propagates the read-only/read-write flags
1460   of the statement transaction to its enclosing normal transaction.
1461 
1462   If we have at least two engines with read-write changes we must
1463   run a two-phase commit. Otherwise we can run several independent
1464   commits as the only transactional engine has read-write changes
1465   and others are read-only.
1466 
1467   @retval   0   All engines are read-only.
1468   @retval   1   We have the only engine with read-write changes.
1469   @retval   >1  More than one engine have read-write changes.
1470                 Note: return value might NOT be the exact number of
1471                 engines with read-write changes.
1472 */
1473 
1474 static
1475 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1476 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1477                                     bool all)
1478 {
1479   /* The number of storage engines that have actual changes. */
1480   unsigned rw_ha_count= 0;
1481   Ha_trx_info *ha_info;
1482 
1483   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1484   {
1485     if (ha_info->is_trx_read_write())
1486       ++rw_ha_count;
1487 
1488     if (! all)
1489     {
1490       Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1491       DBUG_ASSERT(ha_info != ha_info_all);
1492       /*
1493         Merge read-only/read-write information about statement
1494         transaction to its enclosing normal transaction. Do this
1495         only if in a real transaction -- that is, if we know
1496         that ha_info_all is registered in thd->transaction.all.
1497         Since otherwise we only clutter the normal transaction flags.
1498       */
1499       if (ha_info_all->is_started()) /* FALSE if autocommit. */
1500         ha_info_all->coalesce_trx_with(ha_info);
1501     }
1502     else if (rw_ha_count > 1)
1503     {
1504       /*
1505         It is a normal transaction, so we don't need to merge read/write
1506         information up, and the need for two-phase commit has been
1507         already established. Break the loop prematurely.
1508       */
1509       break;
1510     }
1511   }
1512   return rw_ha_count;
1513 }
1514 
1515 
1516 /**
1517   @retval
1518     0   ok
1519   @retval
1520     1   transaction was rolled back
1521   @retval
1522     2   error during commit, data may be inconsistent
1523 
1524   @todo
1525     Since we don't support nested statement transactions in 5.0,
1526     we can't commit or rollback stmt transactions while we are inside
1527     stored functions or triggers. So we simply do nothing now.
1528     TODO: This should be fixed in later ( >= 5.1) releases.
1529 */
ha_commit_trans(THD * thd,bool all)1530 int ha_commit_trans(THD *thd, bool all)
1531 {
1532   int error= 0, cookie;
1533   /*
1534     'all' means that this is either an explicit commit issued by
1535     user, or an implicit commit issued by a DDL.
1536   */
1537   THD_TRANS *trans= all ? &thd->transaction->all : &thd->transaction->stmt;
1538   /*
1539     "real" is a nick name for a transaction for which a commit will
1540     make persistent changes. E.g. a 'stmt' transaction inside an 'all'
1541     transaction is not 'real': even though it's possible to commit it,
1542     the changes are not durable as they might be rolled back if the
1543     enclosing 'all' transaction is rolled back.
1544   */
1545   bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) &&
1546                        !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1547   Ha_trx_info *ha_info= trans->ha_list;
1548   bool need_prepare_ordered, need_commit_ordered;
1549   my_xid xid;
1550 #ifdef WITH_WSREP
1551   const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1552 #endif /* WITH_WSREP */
1553   DBUG_ENTER("ha_commit_trans");
1554   DBUG_PRINT("info",("thd: %p  option_bits: %lu  all: %d",
1555                      thd, (ulong) thd->variables.option_bits, all));
1556 
1557   /* Just a random warning to test warnings pushed during autocommit. */
1558   DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
1559     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1560                  ER_WARNING_NOT_COMPLETE_ROLLBACK,
1561                  ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)););
1562 
1563   DBUG_PRINT("info",
1564              ("all: %d  thd->in_sub_stmt: %d  ha_info: %p  is_real_trans: %d",
1565               all, thd->in_sub_stmt, ha_info, is_real_trans));
1566   /*
1567     We must not commit the normal transaction if a statement
1568     transaction is pending. Otherwise statement transaction
1569     flags will not get propagated to its normal transaction's
1570     counterpart.
1571   */
1572   DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL ||
1573               trans == &thd->transaction->stmt);
1574 
1575   if (thd->in_sub_stmt)
1576   {
1577     DBUG_ASSERT(0);
1578     /*
1579       Since we don't support nested statement transactions in 5.0,
1580       we can't commit or rollback stmt transactions while we are inside
1581       stored functions or triggers. So we simply do nothing now.
1582       TODO: This should be fixed in later ( >= 5.1) releases.
1583     */
1584     if (!all)
1585       DBUG_RETURN(0);
1586     /*
1587       We assume that all statements which commit or rollback main transaction
1588       are prohibited inside of stored functions or triggers. So they should
1589       bail out with error even before ha_commit_trans() call. To be 100% safe
1590       let us throw error in non-debug builds.
1591     */
1592     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1593     DBUG_RETURN(2);
1594   }
1595 
1596   if (!ha_info)
1597   {
1598     /*
1599       Free resources and perform other cleanup even for 'empty' transactions.
1600     */
1601     if (is_real_trans)
1602     {
1603       thd->transaction->cleanup();
1604       MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1605       thd->m_transaction_psi= NULL;
1606     }
1607 #ifdef WITH_WSREP
1608     if (wsrep_is_active(thd) && is_real_trans && !error)
1609       wsrep_commit_empty(thd, all);
1610 #endif /* WITH_WSREP */
1611 
1612     DBUG_RETURN(0);
1613   }
1614 
1615   DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1616 
1617   /* Close all cursors that can not survive COMMIT */
1618   if (is_real_trans)                          /* not a statement commit */
1619     thd->stmt_map.close_transient_cursors();
1620 
1621   uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1622   /* rw_trans is TRUE when we in a transaction changing data */
1623   bool rw_trans= is_real_trans &&
1624                  (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
1625   MDL_request mdl_backup;
1626   DBUG_PRINT("info", ("is_real_trans: %d  rw_trans:  %d  rw_ha_count: %d",
1627                       is_real_trans, rw_trans, rw_ha_count));
1628 
1629   if (rw_trans)
1630   {
1631     /*
1632       Acquire a metadata lock which will ensure that COMMIT is blocked
1633       by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1634       COMMIT in progress blocks FTWRL).
1635 
1636       We allow the owner of FTWRL to COMMIT; we assume that it knows
1637       what it does.
1638     */
1639     MDL_REQUEST_INIT(&mdl_backup, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
1640                      MDL_EXPLICIT);
1641 
1642     if (!WSREP(thd))
1643     {
1644       if (thd->mdl_context.acquire_lock(&mdl_backup,
1645                                         thd->variables.lock_wait_timeout))
1646       {
1647         my_error(ER_ERROR_DURING_COMMIT, MYF(0), 1);
1648         ha_rollback_trans(thd, all);
1649         DBUG_RETURN(1);
1650       }
1651       thd->backup_commit_lock= &mdl_backup;
1652     }
1653     DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1654   }
1655 
1656   if (rw_trans &&
1657       opt_readonly &&
1658       !(thd->security_ctx->master_access & PRIV_IGNORE_READ_ONLY) &&
1659       !thd->slave_thread)
1660   {
1661     my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1662     goto err;
1663   }
1664 
1665 #if 1 // FIXME: This should be done in ha_prepare().
1666   if (rw_trans || (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
1667                    thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING &&
1668                    is_real_trans))
1669   {
1670     ulonglong trx_start_id= 0, trx_end_id= 0;
1671     for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
1672     {
1673       if (ha_info->ht()->prepare_commit_versioned)
1674       {
1675         trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id);
1676         if (trx_end_id)
1677           break; // FIXME: use a common ID for cross-engine transactions
1678       }
1679     }
1680 
1681     if (trx_end_id)
1682     {
1683       if (!TR_table::use_transaction_registry)
1684       {
1685         my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
1686         goto err;
1687       }
1688       DBUG_ASSERT(trx_start_id);
1689 #ifdef WITH_WSREP
1690       bool saved_wsrep_on= thd->variables.wsrep_on;
1691       thd->variables.wsrep_on= false;
1692 #endif
1693       TR_table trt(thd, true);
1694       if (trt.update(trx_start_id, trx_end_id))
1695       {
1696 #ifdef WITH_WSREP
1697         thd->variables.wsrep_on= saved_wsrep_on;
1698 #endif
1699         (void) trans_rollback_stmt(thd);
1700         goto err;
1701       }
1702       // Here, the call will not commit inside InnoDB. It is only working
1703       // around closing thd->transaction.stmt open by TR_table::open().
1704       if (all)
1705         commit_one_phase_2(thd, false, &thd->transaction->stmt, false);
1706 #ifdef WITH_WSREP
1707       thd->variables.wsrep_on= saved_wsrep_on;
1708 #endif
1709     }
1710   }
1711 #endif
1712 
1713   if (trans->no_2pc || (rw_ha_count <= 1))
1714   {
1715 #ifdef WITH_WSREP
1716     /*
1717       This commit will not go through log_and_order() where wsrep commit
1718       ordering is normally done. Commit ordering must be done here.
1719     */
1720     if (run_wsrep_hooks)
1721       error= wsrep_before_commit(thd, all);
1722     if (error)
1723     {
1724       ha_rollback_trans(thd, FALSE);
1725       goto wsrep_err;
1726     }
1727 #endif /* WITH_WSREP */
1728     error= ha_commit_one_phase(thd, all);
1729 #ifdef WITH_WSREP
1730     // Here in case of error we must return 2 for inconsistency
1731     if (run_wsrep_hooks && !error)
1732       error= wsrep_after_commit(thd, all) ? 2 : 0;
1733 #endif /* WITH_WSREP */
1734     goto done;
1735   }
1736 
1737   need_prepare_ordered= FALSE;
1738   need_commit_ordered= FALSE;
1739 
1740   for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
1741   {
1742     handlerton *ht= hi->ht();
1743     /*
1744       Do not call two-phase commit if this particular
1745       transaction is read-only. This allows for simpler
1746       implementation in engines that are always read-only.
1747     */
1748     if (! hi->is_trx_read_write())
1749       continue;
1750     /*
1751       Sic: we know that prepare() is not NULL since otherwise
1752       trans->no_2pc would have been set.
1753     */
1754     if (unlikely(prepare_or_error(ht, thd, all)))
1755       goto err;
1756 
1757     need_prepare_ordered|= (ht->prepare_ordered != NULL);
1758     need_commit_ordered|= (ht->commit_ordered != NULL);
1759   }
1760   DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
1761   DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
1762 
1763   if (!is_real_trans)
1764   {
1765     error= commit_one_phase_2(thd, all, trans, is_real_trans);
1766     goto done;
1767   }
1768 
1769   DBUG_ASSERT(thd->transaction->implicit_xid.get_my_xid() ==
1770               thd->transaction->implicit_xid.quick_get_my_xid());
1771   DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA() ||
1772               thd->lex->xa_opt == XA_ONE_PHASE);
1773   xid= thd->transaction->implicit_xid.quick_get_my_xid();
1774 
1775 #ifdef WITH_WSREP
1776   if (run_wsrep_hooks && !error)
1777   {
1778     wsrep::seqno const s= wsrep_xid_seqno(thd->wsrep_xid);
1779     if (!s.is_undefined())
1780     {
1781       // xid was rewritten by wsrep
1782       xid= s.get();
1783     }
1784   }
1785   if (run_wsrep_hooks && (error = wsrep_before_commit(thd, all)))
1786     goto wsrep_err;
1787 #endif /* WITH_WSREP */
1788   DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
1789   cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
1790                                 need_commit_ordered);
1791   if (!cookie)
1792   {
1793     WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie);
1794     goto err;
1795   }
1796   DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
1797   DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
1798 
1799   error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
1800 #ifdef WITH_WSREP
1801   if (run_wsrep_hooks &&
1802       (error || (error = wsrep_after_commit(thd, all))))
1803   {
1804     error = 2;
1805     mysql_mutex_lock(&thd->LOCK_thd_data);
1806     if (wsrep_must_abort(thd))
1807     {
1808       mysql_mutex_unlock(&thd->LOCK_thd_data);
1809       (void)tc_log->unlog(cookie, xid);
1810       goto wsrep_err;
1811     }
1812     mysql_mutex_unlock(&thd->LOCK_thd_data);
1813   }
1814 #endif /* WITH_WSREP */
1815   DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
1816   if (tc_log->unlog(cookie, xid))
1817     error= 2;                                /* Error during commit */
1818 
1819 done:
1820   if (is_real_trans)
1821   {
1822     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1823     thd->m_transaction_psi= NULL;
1824   }
1825 
1826   DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1827 
1828   mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
1829   mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
1830   mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
1831   mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
1832 #ifdef HAVE_REPLICATION
1833   repl_semisync_master.wait_after_commit(thd, all);
1834   DEBUG_SYNC(thd, "after_group_after_commit");
1835 #endif
1836   goto end;
1837 
1838   /* Come here if error and we need to rollback. */
1839 #ifdef WITH_WSREP
1840 wsrep_err:
1841   mysql_mutex_lock(&thd->LOCK_thd_data);
1842   if (run_wsrep_hooks && wsrep_must_abort(thd))
1843   {
1844     WSREP_DEBUG("BF abort has happened after prepare & certify");
1845     mysql_mutex_unlock(&thd->LOCK_thd_data);
1846     ha_rollback_trans(thd, TRUE);
1847   }
1848   else
1849     mysql_mutex_unlock(&thd->LOCK_thd_data);
1850 
1851 #endif /* WITH_WSREP */
1852 err:
1853   error= 1;                                  /* Transaction was rolled back */
1854   /*
1855     In parallel replication, rollback is delayed, as there is extra replication
1856     book-keeping to be done before rolling back and allowing a conflicting
1857     transaction to continue (MDEV-7458).
1858   */
1859   if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec))
1860     ha_rollback_trans(thd, all);
1861   else
1862   {
1863     /*
1864       We are not really doing a rollback here, but the code in trans_commit()
1865       requres that m_transaction_psi is 0 when we return from this function.
1866     */
1867     MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
1868     thd->m_transaction_psi= NULL;
1869     WSREP_DEBUG("rollback skipped %p %d",thd->rgi_slave,
1870                 thd->rgi_slave->is_parallel_exec);
1871   }
1872 end:
1873   if (mdl_backup.ticket)
1874   {
1875     /*
1876       We do not always immediately release transactional locks
1877       after ha_commit_trans() (see uses of ha_enable_transaction()),
1878       thus we release the commit blocker lock as soon as it's
1879       not needed.
1880     */
1881     thd->mdl_context.release_lock(mdl_backup.ticket);
1882   }
1883   thd->backup_commit_lock= 0;
1884 #ifdef WITH_WSREP
1885   if (wsrep_is_active(thd) && is_real_trans && !error &&
1886       (rw_ha_count == 0 || all) &&
1887       wsrep_not_committed(thd))
1888   {
1889     wsrep_commit_empty(thd, all);
1890   }
1891 #endif /* WITH_WSREP */
1892 
1893   DBUG_RETURN(error);
1894 }
1895 
1896 /**
1897   @note
1898   This function does not care about global read lock or backup locks,
1899   the caller should.
1900 
1901   @param[in]  all  Is set in case of explicit commit
1902                    (COMMIT statement), or implicit commit
1903                    issued by DDL. Is not set when called
1904                    at the end of statement, even if
1905                    autocommit=1.
1906 */
1907 
ha_commit_one_phase(THD * thd,bool all)1908 int ha_commit_one_phase(THD *thd, bool all)
1909 {
1910   THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1911   /*
1912     "real" is a nick name for a transaction for which a commit will
1913     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1914     transaction is not 'real': even though it's possible to commit it,
1915     the changes are not durable as they might be rolled back if the
1916     enclosing 'all' transaction is rolled back.
1917     We establish the value of 'is_real_trans' by checking
1918     if it's an explicit COMMIT/BEGIN statement, or implicit
1919     commit issued by DDL (all == TRUE), or if we're running
1920     in autocommit mode (it's only in the autocommit mode
1921     ha_commit_one_phase() can be called with an empty
1922     transaction.all.ha_list, see why in trans_register_ha()).
1923   */
1924   bool is_real_trans= ((all || thd->transaction->all.ha_list == 0) &&
1925                        !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1926   int res;
1927   DBUG_ENTER("ha_commit_one_phase");
1928   if (is_real_trans)
1929   {
1930     DEBUG_SYNC(thd, "ha_commit_one_phase");
1931     if ((res= thd->wait_for_prior_commit()))
1932       DBUG_RETURN(res);
1933   }
1934   res= commit_one_phase_2(thd, all, trans, is_real_trans);
1935   DBUG_RETURN(res);
1936 }
1937 
1938 
1939 static int
commit_one_phase_2(THD * thd,bool all,THD_TRANS * trans,bool is_real_trans)1940 commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
1941 {
1942   int error= 0;
1943   uint count= 0;
1944   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1945   DBUG_ENTER("commit_one_phase_2");
1946   if (is_real_trans)
1947     DEBUG_SYNC(thd, "commit_one_phase_2");
1948 
1949   if (ha_info)
1950   {
1951     for (; ha_info; ha_info= ha_info_next)
1952     {
1953       int err;
1954       handlerton *ht= ha_info->ht();
1955       if ((err= ht->commit(ht, thd, all)))
1956       {
1957         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1958         error=1;
1959       }
1960       /* Should this be done only if is_real_trans is set ? */
1961       status_var_increment(thd->status_var.ha_commit_count);
1962       if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write())
1963         ++count;
1964       ha_info_next= ha_info->next();
1965       ha_info->reset(); /* keep it conveniently zero-filled */
1966     }
1967     trans->ha_list= 0;
1968     trans->no_2pc=0;
1969     if (all)
1970     {
1971 #ifdef HAVE_QUERY_CACHE
1972       if (thd->transaction->changed_tables)
1973         query_cache.invalidate(thd, thd->transaction->changed_tables);
1974 #endif
1975     }
1976   }
1977 
1978   /* Free resources and perform other cleanup even for 'empty' transactions. */
1979   if (is_real_trans)
1980   {
1981     thd->has_waiter= false;
1982     thd->transaction->cleanup();
1983     if (count >= 2)
1984       statistic_increment(transactions_multi_engine, LOCK_status);
1985   }
1986 
1987   DBUG_RETURN(error);
1988 }
1989 
1990 
ha_rollback_trans(THD * thd,bool all)1991 int ha_rollback_trans(THD *thd, bool all)
1992 {
1993   int error=0;
1994   THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
1995   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1996   /*
1997     "real" is a nick name for a transaction for which a commit will
1998     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1999     transaction is not 'real': even though it's possible to commit it,
2000     the changes are not durable as they might be rolled back if the
2001     enclosing 'all' transaction is rolled back.
2002     We establish the value of 'is_real_trans' by checking
2003     if it's an explicit COMMIT or BEGIN statement, or implicit
2004     commit issued by DDL (in these cases all == TRUE),
2005     or if we're running in autocommit mode (it's only in the autocommit mode
2006     ha_commit_one_phase() is called with an empty
2007     transaction.all.ha_list, see why in trans_register_ha()).
2008   */
2009   bool is_real_trans=all || thd->transaction->all.ha_list == 0;
2010   DBUG_ENTER("ha_rollback_trans");
2011 
2012   /*
2013     We must not rollback the normal transaction if a statement
2014     transaction is pending.
2015   */
2016   DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL ||
2017               trans == &thd->transaction->stmt);
2018 
2019 #ifdef HAVE_REPLICATION
2020   if (is_real_trans)
2021   {
2022     /*
2023       In parallel replication, if we need to rollback during commit, we must
2024       first inform following transactions that we are going to abort our commit
2025       attempt. Otherwise those following transactions can run too early, and
2026       possibly cause replication to fail. See comments in retry_event_group().
2027 
2028       There were several bugs with this in the past that were very hard to
2029       track down (MDEV-7458, MDEV-8302). So we add here an assertion for
2030       rollback without signalling following transactions. And in release
2031       builds, we explicitly do the signalling before rolling back.
2032     */
2033     DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) ||
2034                 thd->transaction->xid_state.is_explicit_XA());
2035     if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
2036       thd->rgi_slave->unmark_start_commit();
2037   }
2038 #endif
2039 
2040   if (thd->in_sub_stmt)
2041   {
2042     DBUG_ASSERT(0);
2043     /*
2044       If we are inside stored function or trigger we should not commit or
2045       rollback current statement transaction. See comment in ha_commit_trans()
2046       call for more information.
2047     */
2048     if (!all)
2049       DBUG_RETURN(0);
2050     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2051     DBUG_RETURN(1);
2052   }
2053 
2054 #ifdef WITH_WSREP
2055   (void) wsrep_before_rollback(thd, all);
2056 #endif /* WITH_WSREP */
2057   if (ha_info)
2058   {
2059     /* Close all cursors that can not survive ROLLBACK */
2060     if (is_real_trans)                          /* not a statement commit */
2061       thd->stmt_map.close_transient_cursors();
2062 
2063     for (; ha_info; ha_info= ha_info_next)
2064     {
2065       int err;
2066       handlerton *ht= ha_info->ht();
2067       if ((err= ht->rollback(ht, thd, all)))
2068       {
2069         // cannot happen
2070         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2071         error=1;
2072 #ifdef WITH_WSREP
2073         WSREP_WARN("handlerton rollback failed, thd %lld %lld conf %d SQL %s",
2074                    thd->thread_id, thd->query_id, thd->wsrep_trx().state(),
2075                    thd->query());
2076 #endif /* WITH_WSREP */
2077       }
2078       status_var_increment(thd->status_var.ha_rollback_count);
2079       ha_info_next= ha_info->next();
2080       ha_info->reset(); /* keep it conveniently zero-filled */
2081     }
2082     trans->ha_list= 0;
2083     trans->no_2pc=0;
2084   }
2085 
2086 #ifdef WITH_WSREP
2087   if (thd->is_error())
2088   {
2089     WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: %s: %s; is_real %d",
2090                 thd->thread_id, all?"TRUE":"FALSE", wsrep_thd_query(thd),
2091                 thd->get_stmt_da()->message(), is_real_trans);
2092   }
2093   (void) wsrep_after_rollback(thd, all);
2094 #endif /* WITH_WSREP */
2095 
2096   if (all || !thd->in_active_multi_stmt_transaction())
2097   {
2098     MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2099     thd->m_transaction_psi= NULL;
2100   }
2101 
2102   /* Always cleanup. Even if nht==0. There may be savepoints. */
2103   if (is_real_trans)
2104   {
2105     /*
2106       Thanks to possibility of MDL deadlock rollback request can come even if
2107       transaction hasn't been started in any transactional storage engine.
2108     */
2109     if (thd->transaction_rollback_request &&
2110         thd->transaction->xid_state.is_explicit_XA())
2111       thd->transaction->xid_state.set_error(thd->get_stmt_da()->sql_errno());
2112 
2113     thd->has_waiter= false;
2114     thd->transaction->cleanup();
2115   }
2116   if (all)
2117     thd->transaction_rollback_request= FALSE;
2118 
2119   /*
2120     If a non-transactional table was updated, warn; don't warn if this is a
2121     slave thread (because when a slave thread executes a ROLLBACK, it has
2122     been read from the binary log, so it's 100% sure and normal to produce
2123     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2124     slave SQL thread, it would not stop the thread but just be printed in
2125     the error log; but we don't want users to wonder why they have this
2126     message in the error log, so we don't send it.
2127 
2128     We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
2129     it doesn't matter if a warning is pushed to a system thread or not:
2130     No one will see it...
2131   */
2132   if (is_real_trans && thd->transaction->all.modified_non_trans_table &&
2133       !thd->slave_thread && thd->killed < KILL_CONNECTION)
2134     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
2135                  ER_WARNING_NOT_COMPLETE_ROLLBACK,
2136                  ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));
2137 #ifdef HAVE_REPLICATION
2138   repl_semisync_master.wait_after_rollback(thd, all);
2139 #endif
2140   DBUG_RETURN(error);
2141 }
2142 
2143 
2144 struct xahton_st {
2145   XID *xid;
2146   int result;
2147 };
2148 
xacommit_handlerton(THD * unused1,plugin_ref plugin,void * arg)2149 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
2150                                    void *arg)
2151 {
2152   handlerton *hton= plugin_hton(plugin);
2153   if (hton->recover)
2154   {
2155     hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
2156     ((struct xahton_st *)arg)->result= 0;
2157   }
2158   return FALSE;
2159 }
2160 
xarollback_handlerton(THD * unused1,plugin_ref plugin,void * arg)2161 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
2162                                      void *arg)
2163 {
2164   handlerton *hton= plugin_hton(plugin);
2165   if (hton->recover)
2166   {
2167     hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
2168     ((struct xahton_st *)arg)->result= 0;
2169   }
2170   return FALSE;
2171 }
2172 
2173 
ha_commit_or_rollback_by_xid(XID * xid,bool commit)2174 int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
2175 {
2176   struct xahton_st xaop;
2177   xaop.xid= xid;
2178   xaop.result= 1;
2179 
2180   plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
2181                  MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
2182 
2183   return xaop.result;
2184 }
2185 
2186 
2187 #ifndef DBUG_OFF
2188 /** Converts XID to string.
2189 
2190 @param[out] buf output buffer
2191 @param[in] xid XID to convert
2192 
2193 @return pointer to converted string
2194 
2195 @note This does not need to be multi-byte safe or anything */
xid_to_str(char * buf,const XID & xid)2196 static char *xid_to_str(char *buf, const XID &xid)
2197 {
2198   int i;
2199   char *s=buf;
2200   *s++='\'';
2201   for (i= 0; i < xid.gtrid_length + xid.bqual_length; i++)
2202   {
2203     uchar c= (uchar) xid.data[i];
2204     /* is_next_dig is set if next character is a number */
2205     bool is_next_dig= FALSE;
2206     if (i < XIDDATASIZE)
2207     {
2208       char ch= xid.data[i + 1];
2209       is_next_dig= (ch >= '0' && ch <='9');
2210     }
2211     if (i == xid.gtrid_length)
2212     {
2213       *s++='\'';
2214       if (xid.bqual_length)
2215       {
2216         *s++='.';
2217         *s++='\'';
2218       }
2219     }
2220     if (c < 32 || c > 126)
2221     {
2222       *s++='\\';
2223       /*
2224         If next character is a number, write current character with
2225         3 octal numbers to ensure that the next number is not seen
2226         as part of the octal number
2227       */
2228       if (c > 077 || is_next_dig)
2229         *s++=_dig_vec_lower[c >> 6];
2230       if (c > 007 || is_next_dig)
2231         *s++=_dig_vec_lower[(c >> 3) & 7];
2232       *s++=_dig_vec_lower[c & 7];
2233     }
2234     else
2235     {
2236       if (c == '\'' || c == '\\')
2237         *s++='\\';
2238       *s++=c;
2239     }
2240   }
2241   *s++='\'';
2242   *s=0;
2243   return buf;
2244 }
2245 #endif
2246 
wsrep_order_and_check_continuity(XID * list,int len)2247 static my_xid wsrep_order_and_check_continuity(XID *list, int len)
2248 {
2249 #ifdef WITH_WSREP
2250   wsrep_sort_xid_array(list, len);
2251   wsrep::gtid cur_position= wsrep_get_SE_checkpoint<wsrep::gtid>();
2252   long long cur_seqno= cur_position.seqno().get();
2253   for (int i= 0; i < len; ++i)
2254   {
2255     if (!wsrep_is_wsrep_xid(list + i) ||
2256         wsrep_xid_seqno(list + i) != cur_seqno + 1)
2257     {
2258       WSREP_WARN("Discovered discontinuity in recovered wsrep "
2259                  "transaction XIDs. Truncating the recovery list to "
2260                  "%d entries", i);
2261       break;
2262     }
2263     ++cur_seqno;
2264   }
2265   WSREP_INFO("Last wsrep seqno to be recovered %lld", cur_seqno);
2266   return (cur_seqno < 0 ? 0 : cur_seqno);
2267 #else
2268   return 0;
2269 #endif /* WITH_WSREP */
2270 }
2271 /**
2272   recover() step of xa.
2273 
2274   @note
2275     there are three modes of operation:
2276     - automatic recover after a crash
2277     in this case commit_list != 0, tc_heuristic_recover==0
2278     all xids from commit_list are committed, others are rolled back
2279     - manual (heuristic) recover
2280     in this case commit_list==0, tc_heuristic_recover != 0
2281     DBA has explicitly specified that all prepared transactions should
2282     be committed (or rolled back).
2283     - no recovery (MySQL did not detect a crash)
2284     in this case commit_list==0, tc_heuristic_recover == 0
2285     there should be no prepared transactions in this case.
2286 */
2287 struct xarecover_st
2288 {
2289   int len, found_foreign_xids, found_my_xids;
2290   XID *list;
2291   HASH *commit_list;
2292   bool dry_run;
2293 };
2294 
xarecover_handlerton(THD * unused,plugin_ref plugin,void * arg)2295 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
2296                                     void *arg)
2297 {
2298   handlerton *hton= plugin_hton(plugin);
2299   struct xarecover_st *info= (struct xarecover_st *) arg;
2300   int got;
2301 
2302   if (hton->recover)
2303   {
2304     while ((got= hton->recover(hton, info->list, info->len)) > 0 )
2305     {
2306       sql_print_information("Found %d prepared transaction(s) in %s",
2307                             got, hton_name(hton)->str);
2308       /* If wsrep_on=ON, XIDs are first ordered and then the range of
2309          recovered XIDs is checked for continuity. All the XIDs which
2310          are in continuous range can be safely committed if binlog
2311          is off since they have already ordered and certified in the
2312          cluster.
2313 
2314          The discontinuity of wsrep XIDs may happen because the GTID
2315          is assigned for transaction in wsrep_before_prepare(), but the
2316          commit order is entered in wsrep_before_commit(). This means that
2317          transactions may run prepare step out of order and may
2318          result in gap in wsrep XIDs. This can be the case for example
2319          if we have T1 with seqno 1 and T2 with seqno 2 and the server
2320          crashes after T2 finishes prepare step but before T1 starts
2321          the prepare.
2322       */
2323       my_xid wsrep_limit __attribute__((unused))= 0;
2324 
2325       /* Note that we could call this for binlog also that
2326          will not have WSREP(thd) but global wsrep on might
2327          be true.
2328       */
2329       if (WSREP_ON)
2330         wsrep_limit= wsrep_order_and_check_continuity(info->list, got);
2331 
2332       for (int i=0; i < got; i ++)
2333       {
2334         my_xid x= IF_WSREP(wsrep_is_wsrep_xid(&info->list[i]) ?
2335                            wsrep_xid_seqno(&info->list[i]) :
2336                            info->list[i].get_my_xid(),
2337                            info->list[i].get_my_xid());
2338         if (!x) // not "mine" - that is generated by external TM
2339         {
2340           DBUG_EXECUTE("info",{
2341             char buf[XIDDATASIZE*4+6];
2342             _db_doprnt_("ignore xid %s", xid_to_str(buf, info->list[i]));
2343             });
2344           xid_cache_insert(info->list + i);
2345           info->found_foreign_xids++;
2346           continue;
2347         }
2348         if (IF_WSREP(!(wsrep_emulate_bin_log &&
2349                        wsrep_is_wsrep_xid(info->list + i) &&
2350                        x <= wsrep_limit) && info->dry_run,
2351                      info->dry_run))
2352         {
2353           info->found_my_xids++;
2354           continue;
2355         }
2356         // recovery mode
2357         if (IF_WSREP((wsrep_emulate_bin_log &&
2358                       wsrep_is_wsrep_xid(info->list + i) &&
2359                       x <= wsrep_limit), false) ||
2360             (info->commit_list ?
2361              my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
2362              tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT))
2363         {
2364           int rc= hton->commit_by_xid(hton, info->list+i);
2365           if (rc == 0)
2366           {
2367             DBUG_EXECUTE("info",{
2368               char buf[XIDDATASIZE*4+6];
2369               _db_doprnt_("commit xid %s", xid_to_str(buf, info->list[i]));
2370               });
2371           }
2372         }
2373         else
2374         {
2375           int rc= hton->rollback_by_xid(hton, info->list+i);
2376           if (rc == 0)
2377           {
2378             DBUG_EXECUTE("info",{
2379               char buf[XIDDATASIZE*4+6];
2380               _db_doprnt_("rollback xid %s", xid_to_str(buf, info->list[i]));
2381               });
2382           }
2383         }
2384       }
2385       if (got < info->len)
2386         break;
2387     }
2388   }
2389   return FALSE;
2390 }
2391 
ha_recover(HASH * commit_list)2392 int ha_recover(HASH *commit_list)
2393 {
2394   struct xarecover_st info;
2395   DBUG_ENTER("ha_recover");
2396   info.found_foreign_xids= info.found_my_xids= 0;
2397   info.commit_list= commit_list;
2398   info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
2399   info.list= NULL;
2400 
2401   /* commit_list and tc_heuristic_recover cannot be set both */
2402   DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
2403   /* if either is set, total_ha_2pc must be set too */
2404   DBUG_ASSERT(info.dry_run ||
2405               (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
2406 
2407   if (total_ha_2pc <= (ulong)opt_bin_log)
2408     DBUG_RETURN(0);
2409 
2410   if (info.commit_list)
2411     sql_print_information("Starting crash recovery...");
2412 
2413   for (info.len= MAX_XID_LIST_SIZE ;
2414        info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
2415   {
2416     DBUG_EXECUTE_IF("min_xa_len", info.len = 16;);
2417     info.list=(XID *)my_malloc(key_memory_XID, info.len*sizeof(XID), MYF(0));
2418   }
2419   if (!info.list)
2420   {
2421     sql_print_error(ER(ER_OUTOFMEMORY),
2422                     static_cast<int>(info.len*sizeof(XID)));
2423     DBUG_RETURN(1);
2424   }
2425 
2426   plugin_foreach(NULL, xarecover_handlerton,
2427                  MYSQL_STORAGE_ENGINE_PLUGIN, &info);
2428 
2429   my_free(info.list);
2430   if (info.found_foreign_xids)
2431     sql_print_warning("Found %d prepared XA transactions",
2432                       info.found_foreign_xids);
2433   if (info.dry_run && info.found_my_xids)
2434   {
2435     sql_print_error("Found %d prepared transactions! It means that mysqld was "
2436                     "not shut down properly last time and critical recovery "
2437                     "information (last binlog or %s file) was manually deleted "
2438                     "after a crash. You have to start mysqld with "
2439                     "--tc-heuristic-recover switch to commit or rollback "
2440                     "pending transactions.",
2441                     info.found_my_xids, opt_tc_log_file);
2442     DBUG_RETURN(1);
2443   }
2444   if (info.commit_list)
2445     sql_print_information("Crash recovery finished.");
2446   DBUG_RETURN(0);
2447 }
2448 
2449 
2450 /*
2451   Called by engine to notify TC that a new commit checkpoint has been reached.
2452   See comments on handlerton method commit_checkpoint_request() for details.
2453 */
commit_checkpoint_notify_ha(void * cookie)2454 void commit_checkpoint_notify_ha(void *cookie)
2455 {
2456   tc_log->commit_checkpoint_notify(cookie);
2457 }
2458 
2459 
2460 /**
2461   Check if all storage engines used in transaction agree that after
2462   rollback to savepoint it is safe to release MDL locks acquired after
2463   savepoint creation.
2464 
2465   @param thd   The client thread that executes the transaction.
2466 
2467   @return true  - It is safe to release MDL locks.
2468           false - If it is not.
2469 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2470 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2471 {
2472   Ha_trx_info *ha_info;
2473   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2474                                         &thd->transaction->all);
2475 
2476   DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2477 
2478   /**
2479     Checking whether it is safe to release metadata locks after rollback to
2480     savepoint in all the storage engines that are part of the transaction.
2481   */
2482   for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2483   {
2484     handlerton *ht= ha_info->ht();
2485     DBUG_ASSERT(ht);
2486 
2487     if (ht->savepoint_rollback_can_release_mdl == 0 ||
2488         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2489       DBUG_RETURN(false);
2490   }
2491 
2492   DBUG_RETURN(true);
2493 }
2494 
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2495 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2496 {
2497   int error=0;
2498   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2499                                         &thd->transaction->all);
2500   Ha_trx_info *ha_info, *ha_info_next;
2501 
2502   DBUG_ENTER("ha_rollback_to_savepoint");
2503 
2504   trans->no_2pc=0;
2505   /*
2506     rolling back to savepoint in all storage engines that were part of the
2507     transaction when the savepoint was set
2508   */
2509   for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2510   {
2511     int err;
2512     handlerton *ht= ha_info->ht();
2513     DBUG_ASSERT(ht);
2514     DBUG_ASSERT(ht->savepoint_set != 0);
2515     if ((err= ht->savepoint_rollback(ht, thd,
2516                                      (uchar *)(sv+1)+ht->savepoint_offset)))
2517     { // cannot happen
2518       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2519       error=1;
2520     }
2521     status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2522     trans->no_2pc|= ht->prepare == 0;
2523   }
2524   /*
2525     rolling back the transaction in all storage engines that were not part of
2526     the transaction when the savepoint was set
2527   */
2528   for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2529        ha_info= ha_info_next)
2530   {
2531     int err;
2532     handlerton *ht= ha_info->ht();
2533 #ifdef WITH_WSREP
2534     if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2535     {
2536       WSREP_DEBUG("ha_rollback_to_savepoint: run before_rollbackha_rollback_trans hook");
2537       (void) wsrep_before_rollback(thd, !thd->in_sub_stmt);
2538 
2539     }
2540 #endif // WITH_WSREP
2541     if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2542     { // cannot happen
2543       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2544       error=1;
2545     }
2546 #ifdef WITH_WSREP
2547     if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2548     {
2549       WSREP_DEBUG("ha_rollback_to_savepoint: run after_rollback hook");
2550       (void) wsrep_after_rollback(thd, !thd->in_sub_stmt);
2551     }
2552 #endif // WITH_WSREP
2553     status_var_increment(thd->status_var.ha_rollback_count);
2554     ha_info_next= ha_info->next();
2555     ha_info->reset(); /* keep it conveniently zero-filled */
2556   }
2557   trans->ha_list= sv->ha_list;
2558 
2559   if (thd->m_transaction_psi != NULL)
2560     MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2561 
2562   DBUG_RETURN(error);
2563 }
2564 
2565 /**
2566   @note
2567   according to the sql standard (ISO/IEC 9075-2:2003)
2568   section "4.33.4 SQL-statements and transaction states",
2569   SAVEPOINT is *not* transaction-initiating SQL-statement
2570 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2571 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2572 {
2573 #ifdef WITH_WSREP
2574   /*
2575     Register binlog hton for savepoint processing if wsrep binlog
2576     emulation is on.
2577    */
2578   if (WSREP_EMULATE_BINLOG(thd) && wsrep_thd_is_local(thd))
2579   {
2580     wsrep_register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
2581   }
2582 #endif /* WITH_WSREP */
2583   int error=0;
2584   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction->stmt :
2585                                         &thd->transaction->all);
2586   Ha_trx_info *ha_info= trans->ha_list;
2587   DBUG_ENTER("ha_savepoint");
2588 
2589   for (; ha_info; ha_info= ha_info->next())
2590   {
2591     int err;
2592     handlerton *ht= ha_info->ht();
2593     DBUG_ASSERT(ht);
2594     if (! ht->savepoint_set)
2595     {
2596       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2597       error=1;
2598       break;
2599     }
2600     if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2601     { // cannot happen
2602       my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2603       error=1;
2604     }
2605     status_var_increment(thd->status_var.ha_savepoint_count);
2606   }
2607   /*
2608     Remember the list of registered storage engines. All new
2609     engines are prepended to the beginning of the list.
2610   */
2611   sv->ha_list= trans->ha_list;
2612 
2613   if (!error && thd->m_transaction_psi != NULL)
2614     MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2615 
2616   DBUG_RETURN(error);
2617 }
2618 
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2619 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2620 {
2621   int error=0;
2622   Ha_trx_info *ha_info= sv->ha_list;
2623   DBUG_ENTER("ha_release_savepoint");
2624 
2625   for (; ha_info; ha_info= ha_info->next())
2626   {
2627     int err;
2628     handlerton *ht= ha_info->ht();
2629     /* Savepoint life time is enclosed into transaction life time. */
2630     DBUG_ASSERT(ht);
2631     if (!ht->savepoint_release)
2632       continue;
2633     if ((err= ht->savepoint_release(ht, thd,
2634                                     (uchar *)(sv+1) + ht->savepoint_offset)))
2635     { // cannot happen
2636       my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2637       error=1;
2638     }
2639   }
2640 
2641   if (thd->m_transaction_psi != NULL)
2642     MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2643 
2644   DBUG_RETURN(error);
2645 }
2646 
2647 
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2648 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2649                                    void *arg)
2650 {
2651   handlerton *hton= plugin_hton(plugin);
2652   if (hton->start_consistent_snapshot)
2653   {
2654     if (hton->start_consistent_snapshot(hton, thd))
2655       return TRUE;
2656     *((bool *)arg)= false;
2657   }
2658   return FALSE;
2659 }
2660 
ha_start_consistent_snapshot(THD * thd)2661 int ha_start_consistent_snapshot(THD *thd)
2662 {
2663   bool err, warn= true;
2664 
2665   /*
2666     Holding the LOCK_commit_ordered mutex ensures that we get the same
2667     snapshot for all engines (including the binary log).  This allows us
2668     among other things to do backups with
2669     START TRANSACTION WITH CONSISTENT SNAPSHOT and
2670     have a consistent binlog position.
2671   */
2672   mysql_mutex_lock(&LOCK_commit_ordered);
2673   err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2674   mysql_mutex_unlock(&LOCK_commit_ordered);
2675 
2676   if (err)
2677   {
2678     ha_rollback_trans(thd, true);
2679     return 1;
2680   }
2681 
2682   /*
2683     Same idea as when one wants to CREATE TABLE in one engine which does not
2684     exist:
2685   */
2686   if (warn)
2687     push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2688                  "This MariaDB server does not support any "
2689                  "consistent-read capable storage engine");
2690   return 0;
2691 }
2692 
2693 
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2694 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2695                                 void *arg)
2696 {
2697   handlerton *hton= plugin_hton(plugin);
2698   return hton->flush_logs && hton->flush_logs(hton);
2699 }
2700 
2701 
ha_flush_logs()2702 bool ha_flush_logs()
2703 {
2704   return plugin_foreach(NULL, flush_handlerton,
2705                         MYSQL_STORAGE_ENGINE_PLUGIN, 0);
2706 }
2707 
2708 
2709 /**
2710   @brief make canonical filename
2711 
2712   @param[in]  file     table handler
2713   @param[in]  path     original path
2714   @param[out] tmp_path buffer for canonized path
2715 
2716   @details Lower case db name and table name path parts for
2717            non file based tables when lower_case_table_names
2718            is 2 (store as is, compare in lower case).
2719            Filesystem path prefix (mysql_data_home or tmpdir)
2720            is left intact.
2721 
2722   @note tmp_path may be left intact if no conversion was
2723         performed.
2724 
2725   @retval canonized path
2726 
2727   @todo This may be done more efficiently when table path
2728         gets built. Convert this function to something like
2729         ASSERT_CANONICAL_FILENAME.
2730 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2731 const char *get_canonical_filename(handler *file, const char *path,
2732                                    char *tmp_path)
2733 {
2734   uint i;
2735   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2736     return path;
2737 
2738   for (i= 0; i <= mysql_tmpdir_list.max; i++)
2739   {
2740     if (is_prefix(path, mysql_tmpdir_list.list[i]))
2741       return path;
2742   }
2743 
2744   /* Ensure that table handler get path in lower case */
2745   if (tmp_path != path)
2746     strmov(tmp_path, path);
2747 
2748   /*
2749     we only should turn into lowercase database/table part
2750     so start the process after homedirectory
2751   */
2752   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2753   return tmp_path;
2754 }
2755 
2756 
2757 /**
2758    Delete a table in the engine
2759 
2760    @return 0   Table was deleted
2761    @return -1  Table didn't exists, no error given
2762    @return #   Error from table handler
2763 
2764   @note
2765   ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors.
2766   The .frm file should be deleted by the caller only if we return <= 0.
2767 */
2768 
ha_delete_table(THD * thd,handlerton * hton,const char * path,const LEX_CSTRING * db,const LEX_CSTRING * alias,bool generate_warning)2769 int ha_delete_table(THD *thd, handlerton *hton, const char *path,
2770                     const LEX_CSTRING *db, const LEX_CSTRING *alias,
2771                     bool generate_warning)
2772 {
2773   int error;
2774   bool is_error= thd->is_error();
2775   DBUG_ENTER("ha_delete_table");
2776 
2777   /* hton is NULL in ALTER TABLE when renaming only .frm files */
2778   if (hton == NULL || hton == view_pseudo_hton)
2779     DBUG_RETURN(0);
2780 
2781   if (ha_check_if_updates_are_ignored(thd, hton, "DROP"))
2782     DBUG_RETURN(0);
2783 
2784   error= hton->drop_table(hton, path);
2785   if (error > 0)
2786   {
2787     /*
2788       It's not an error if the table doesn't exist in the engine.
2789       warn the user, but still report DROP being a success
2790     */
2791     bool intercept= non_existing_table_error(error);
2792 
2793     if ((!intercept || generate_warning) && ! thd->is_error())
2794     {
2795       TABLE dummy_table;
2796       TABLE_SHARE dummy_share;
2797       handler *file= get_new_handler(nullptr, thd->mem_root, hton);
2798       if (file) {
2799         bzero((char*) &dummy_table, sizeof(dummy_table));
2800         bzero((char*) &dummy_share, sizeof(dummy_share));
2801         dummy_share.path.str= (char*) path;
2802         dummy_share.path.length= strlen(path);
2803         dummy_share.normalized_path= dummy_share.path;
2804         dummy_share.db= *db;
2805         dummy_share.table_name= *alias;
2806         dummy_table.s= &dummy_share;
2807         dummy_table.alias.set(alias->str, alias->length, table_alias_charset);
2808         file->change_table_ptr(&dummy_table, &dummy_share);
2809         file->print_error(error, MYF(intercept ? ME_WARNING : 0));
2810         delete file;
2811       }
2812     }
2813     if (intercept)
2814     {
2815       /* Clear error if we got it in this function */
2816       if (!is_error)
2817         thd->clear_error();
2818       error= -1;
2819     }
2820   }
2821   if (error)
2822     DBUG_PRINT("exit", ("error: %d", error));
2823   DBUG_RETURN(error);
2824 }
2825 
2826 /****************************************************************************
2827 ** General handler functions
2828 ****************************************************************************/
2829 
2830 
2831 /**
2832    Clone a handler
2833 
2834    @param name     name of new table instance
2835    @param mem_root Where 'this->ref' should be allocated. It can't be
2836                    in this->table->mem_root as otherwise we will not be
2837                    able to reclaim that memory when the clone handler
2838                    object is destroyed.
2839 */
2840 
clone(const char * name,MEM_ROOT * mem_root)2841 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2842 {
2843   handler *new_handler= get_new_handler(table->s, mem_root, ht);
2844 
2845   if (!new_handler)
2846     return NULL;
2847   if (new_handler->set_ha_share_ref(ha_share))
2848     goto err;
2849 
2850   /*
2851     TODO: Implement a more efficient way to have more than one index open for
2852     the same table instance. The ha_open call is not cacheable for clone.
2853 
2854     This is not critical as the engines already have the table open
2855     and should be able to use the original instance of the table.
2856   */
2857   if (new_handler->ha_open(table, name, table->db_stat,
2858                            HA_OPEN_IGNORE_IF_LOCKED, mem_root))
2859     goto err;
2860 
2861   return new_handler;
2862 
2863 err:
2864   delete new_handler;
2865   return NULL;
2866 }
2867 
2868 
2869 /**
2870   clone of current handler.
2871 
2872   Creates a clone of handler used for unique hash key and WITHOUT OVERLAPS.
2873   @return error code
2874 */
create_lookup_handler()2875 int handler::create_lookup_handler()
2876 {
2877   handler *tmp;
2878   if (lookup_handler != this)
2879     return 0;
2880   if (!(tmp= clone(table->s->normalized_path.str, table->in_use->mem_root)))
2881     return 1;
2882   lookup_handler= tmp;
2883   return lookup_handler->ha_external_lock(table->in_use, F_RDLCK);
2884 }
2885 
engine_name()2886 LEX_CSTRING *handler::engine_name()
2887 {
2888   return hton_name(ht);
2889 }
2890 
2891 
2892 /*
2893   It is assumed that the value of the parameter 'ranges' can be only 0 or 1.
2894   If ranges == 1 then the function returns the cost of index only scan
2895   by index 'keyno' of one range containing 'rows' key entries.
2896   If ranges == 0 then the function returns only the cost of copying
2897   those key entries into the engine buffers.
2898 */
2899 
keyread_time(uint index,uint ranges,ha_rows rows)2900 double handler::keyread_time(uint index, uint ranges, ha_rows rows)
2901 {
2902   DBUG_ASSERT(ranges == 0 || ranges == 1);
2903   size_t len= table->key_info[index].key_length + ref_length;
2904   if (table->file->is_clustering_key(index))
2905     len= table->s->stored_rec_length;
2906   double cost= (double)rows*len/(stats.block_size+1)*IDX_BLOCK_COPY_COST;
2907   if (ranges)
2908   {
2909     uint keys_per_block= (uint) (stats.block_size*3/4/len+1);
2910     ulonglong blocks= (rows+ keys_per_block- 1)/keys_per_block;
2911     cost+= blocks;
2912   }
2913   return cost;
2914 }
2915 
2916 
ha_thd(void) const2917 THD *handler::ha_thd(void) const
2918 {
2919   DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2920   return (table && table->in_use) ? table->in_use : current_thd;
2921 }
2922 
unbind_psi()2923 void handler::unbind_psi()
2924 {
2925   /*
2926     Notify the instrumentation that this table is not owned
2927     by this thread any more.
2928   */
2929   PSI_CALL_unbind_table(m_psi);
2930 }
2931 
rebind_psi()2932 void handler::rebind_psi()
2933 {
2934   /*
2935     Notify the instrumentation that this table is now owned
2936     by this thread.
2937   */
2938   m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi);
2939 }
2940 
2941 
start_psi_batch_mode()2942 void handler::start_psi_batch_mode()
2943 {
2944 #ifdef HAVE_PSI_TABLE_INTERFACE
2945   DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2946   DBUG_ASSERT(m_psi_locker == NULL);
2947   m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2948   m_psi_numrows= 0;
2949 #endif
2950 }
2951 
end_psi_batch_mode()2952 void handler::end_psi_batch_mode()
2953 {
2954 #ifdef HAVE_PSI_TABLE_INTERFACE
2955   DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2956   if (m_psi_locker != NULL)
2957   {
2958     DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2959     PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2960     m_psi_locker= NULL;
2961   }
2962   m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2963 #endif
2964 }
2965 
ha_table_share_psi() const2966 PSI_table_share *handler::ha_table_share_psi() const
2967 {
2968   return table_share->m_psi;
2969 }
2970 
2971 /** @brief
2972   Open database-handler.
2973 
2974   IMPLEMENTATION
2975     Try O_RDONLY if cannot open as O_RDWR
2976     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2977 */
ha_open(TABLE * table_arg,const char * name,int mode,uint test_if_locked,MEM_ROOT * mem_root,List<String> * partitions_to_open)2978 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2979                      uint test_if_locked, MEM_ROOT *mem_root,
2980                      List<String> *partitions_to_open)
2981 {
2982   int error;
2983   DBUG_ENTER("handler::ha_open");
2984   DBUG_PRINT("enter",
2985              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2986               name, ht->db_type, table_arg->db_stat, mode,
2987               test_if_locked));
2988 
2989   table= table_arg;
2990   DBUG_ASSERT(table->s == table_share);
2991   DBUG_ASSERT(m_lock_type == F_UNLCK);
2992   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2993   DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2994 
2995   set_partitions_to_open(partitions_to_open);
2996 
2997   if (unlikely((error=open(name,mode,test_if_locked))))
2998   {
2999     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
3000 	(table->db_stat & HA_TRY_READ_ONLY))
3001     {
3002       table->db_stat|=HA_READ_ONLY;
3003       error=open(name,O_RDONLY,test_if_locked);
3004     }
3005   }
3006   if (unlikely(error))
3007   {
3008     my_errno= error;                            /* Safeguard */
3009     DBUG_PRINT("error",("error: %d  errno: %d",error,errno));
3010   }
3011   else
3012   {
3013     DBUG_ASSERT(m_psi == NULL);
3014     DBUG_ASSERT(table_share != NULL);
3015     /*
3016       Do not call this for partitions handlers, since it may take too much
3017       resources.
3018       So only use the m_psi on table level, not for individual partitions.
3019     */
3020     if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
3021     {
3022       m_psi= PSI_CALL_open_table(ha_table_share_psi(), this);
3023     }
3024 
3025     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
3026       table->db_stat|=HA_READ_ONLY;
3027     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
3028 
3029     /* Allocate ref in thd or on the table's mem_root */
3030     if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root,
3031                                    ALIGN_SIZE(ref_length)*2)))
3032     {
3033       ha_close();
3034       error=HA_ERR_OUT_OF_MEM;
3035     }
3036     else
3037       dup_ref=ref+ALIGN_SIZE(ref_length);
3038     cached_table_flags= table_flags();
3039   }
3040   reset_statistics();
3041   internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
3042   DBUG_RETURN(error);
3043 }
3044 
ha_close(void)3045 int handler::ha_close(void)
3046 {
3047   DBUG_ENTER("ha_close");
3048   /*
3049     Increment global statistics for temporary tables.
3050     In_use is 0 for tables that was closed from the table cache.
3051   */
3052   if (table->in_use)
3053     status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
3054   PSI_CALL_close_table(table_share, m_psi);
3055   m_psi= NULL; /* instrumentation handle, invalid after close_table() */
3056   DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
3057   DBUG_ASSERT(m_psi_locker == NULL);
3058 
3059   /* Detach from ANALYZE tracker */
3060   tracker= NULL;
3061   /* We use ref as way to check that open succeded */
3062   ref= 0;
3063 
3064   DBUG_ASSERT(m_lock_type == F_UNLCK);
3065   DBUG_ASSERT(inited == NONE);
3066   DBUG_RETURN(close());
3067 }
3068 
3069 
ha_rnd_next(uchar * buf)3070 int handler::ha_rnd_next(uchar *buf)
3071 {
3072   int result;
3073   DBUG_ENTER("handler::ha_rnd_next");
3074   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3075               m_lock_type != F_UNLCK);
3076   DBUG_ASSERT(inited == RND);
3077 
3078   do
3079   {
3080     TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3081       { result= rnd_next(buf); })
3082     if (result != HA_ERR_RECORD_DELETED)
3083       break;
3084     status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count);
3085   } while (!table->in_use->check_killed(1));
3086 
3087   if (result == HA_ERR_RECORD_DELETED)
3088     result= HA_ERR_ABORTED_BY_USER;
3089   else
3090   {
3091     if (!result)
3092     {
3093       update_rows_read();
3094       if (table->vfield && buf == table->record[0])
3095         table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3096     }
3097     increment_statistics(&SSV::ha_read_rnd_next_count);
3098   }
3099 
3100   table->status=result ? STATUS_NOT_FOUND: 0;
3101   DBUG_RETURN(result);
3102 }
3103 
ha_rnd_pos(uchar * buf,uchar * pos)3104 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
3105 {
3106   int result;
3107   DBUG_ENTER("handler::ha_rnd_pos");
3108   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3109               m_lock_type != F_UNLCK);
3110   DBUG_ASSERT(inited == RND);
3111 
3112   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3113     { result= rnd_pos(buf, pos); })
3114   increment_statistics(&SSV::ha_read_rnd_count);
3115   if (result == HA_ERR_RECORD_DELETED)
3116     result= HA_ERR_KEY_NOT_FOUND;
3117   else if (!result)
3118   {
3119     update_rows_read();
3120     if (table->vfield && buf == table->record[0])
3121       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3122   }
3123   table->status=result ? STATUS_NOT_FOUND: 0;
3124   DBUG_RETURN(result);
3125 }
3126 
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3127 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3128                                       key_part_map keypart_map,
3129                                       enum ha_rkey_function find_flag)
3130 {
3131   int result;
3132   DBUG_ENTER("handler::ha_index_read_map");
3133   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3134               m_lock_type != F_UNLCK);
3135   DBUG_ASSERT(inited==INDEX);
3136 
3137   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3138     { result= index_read_map(buf, key, keypart_map, find_flag); })
3139   increment_statistics(&SSV::ha_read_key_count);
3140   if (!result)
3141   {
3142     update_index_statistics();
3143     if (table->vfield && buf == table->record[0])
3144       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3145   }
3146   table->status=result ? STATUS_NOT_FOUND: 0;
3147   DBUG_RETURN(result);
3148 }
3149 
3150 /*
3151   @note: Other index lookup/navigation functions require prior
3152   handler->index_init() call. This function is different, it requires
3153   that the scan is not initialized, and accepts "uint index" as an argument.
3154 */
3155 
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3156 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3157                                           key_part_map keypart_map,
3158                                           enum ha_rkey_function find_flag)
3159 {
3160   int result;
3161   DBUG_ASSERT(inited==NONE);
3162   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3163               m_lock_type != F_UNLCK);
3164   DBUG_ASSERT(end_range == NULL);
3165   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, index, result,
3166     { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3167   increment_statistics(&SSV::ha_read_key_count);
3168   if (!result)
3169   {
3170     update_rows_read();
3171     index_rows_read[index]++;
3172     if (table->vfield && buf == table->record[0])
3173       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3174   }
3175   table->status=result ? STATUS_NOT_FOUND: 0;
3176   return result;
3177 }
3178 
ha_index_next(uchar * buf)3179 int handler::ha_index_next(uchar * buf)
3180 {
3181   int result;
3182   DBUG_ENTER("handler::ha_index_next");
3183  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3184               m_lock_type != F_UNLCK);
3185   DBUG_ASSERT(inited==INDEX);
3186 
3187   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3188     { result= index_next(buf); })
3189   increment_statistics(&SSV::ha_read_next_count);
3190   if (!result)
3191   {
3192     update_index_statistics();
3193     if (table->vfield && buf == table->record[0])
3194       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3195   }
3196   table->status=result ? STATUS_NOT_FOUND: 0;
3197 
3198   DEBUG_SYNC(ha_thd(), "handler_ha_index_next_end");
3199 
3200   DBUG_RETURN(result);
3201 }
3202 
ha_index_prev(uchar * buf)3203 int handler::ha_index_prev(uchar * buf)
3204 {
3205   int result;
3206   DBUG_ENTER("handler::ha_index_prev");
3207   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3208               m_lock_type != F_UNLCK);
3209   DBUG_ASSERT(inited==INDEX);
3210 
3211   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3212     { result= index_prev(buf); })
3213   increment_statistics(&SSV::ha_read_prev_count);
3214   if (!result)
3215   {
3216     update_index_statistics();
3217     if (table->vfield && buf == table->record[0])
3218       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3219   }
3220   table->status=result ? STATUS_NOT_FOUND: 0;
3221   DBUG_RETURN(result);
3222 }
3223 
ha_index_first(uchar * buf)3224 int handler::ha_index_first(uchar * buf)
3225 {
3226   int result;
3227   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3228               m_lock_type != F_UNLCK);
3229   DBUG_ASSERT(inited==INDEX);
3230 
3231   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3232     { result= index_first(buf); })
3233   increment_statistics(&SSV::ha_read_first_count);
3234   if (!result)
3235   {
3236     update_index_statistics();
3237     if (table->vfield && buf == table->record[0])
3238       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3239   }
3240   table->status=result ? STATUS_NOT_FOUND: 0;
3241   return result;
3242 }
3243 
ha_index_last(uchar * buf)3244 int handler::ha_index_last(uchar * buf)
3245 {
3246   int result;
3247   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3248               m_lock_type != F_UNLCK);
3249   DBUG_ASSERT(inited==INDEX);
3250 
3251   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3252     { result= index_last(buf); })
3253   increment_statistics(&SSV::ha_read_last_count);
3254   if (!result)
3255   {
3256     update_index_statistics();
3257     if (table->vfield && buf == table->record[0])
3258       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3259   }
3260   table->status=result ? STATUS_NOT_FOUND: 0;
3261   return result;
3262 }
3263 
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3264 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3265 {
3266   int result;
3267   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3268               m_lock_type != F_UNLCK);
3269   DBUG_ASSERT(inited==INDEX);
3270 
3271   TABLE_IO_WAIT(tracker, PSI_TABLE_FETCH_ROW, active_index, result,
3272     { result= index_next_same(buf, key, keylen); })
3273   increment_statistics(&SSV::ha_read_next_count);
3274   if (!result)
3275   {
3276     update_index_statistics();
3277     if (table->vfield && buf == table->record[0])
3278       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3279   }
3280   table->status=result ? STATUS_NOT_FOUND: 0;
3281   return result;
3282 }
3283 
3284 
ha_was_semi_consistent_read()3285 bool handler::ha_was_semi_consistent_read()
3286 {
3287   bool result= was_semi_consistent_read();
3288   if (result)
3289     increment_statistics(&SSV::ha_read_retry_count);
3290   return result;
3291 }
3292 
3293 /* Initialize handler for random reading, with error handling */
3294 
ha_rnd_init_with_error(bool scan)3295 int handler::ha_rnd_init_with_error(bool scan)
3296 {
3297   int error;
3298   if (likely(!(error= ha_rnd_init(scan))))
3299     return 0;
3300   table->file->print_error(error, MYF(0));
3301   return error;
3302 }
3303 
3304 
3305 /**
3306   Read first row (only) from a table. Used for reading tables with
3307   only one row, either based on table statistics or if table is a SEQUENCE.
3308 
3309   This is never called for normal InnoDB tables, as these table types
3310   does not have HA_STATS_RECORDS_IS_EXACT set.
3311 */
read_first_row(uchar * buf,uint primary_key)3312 int handler::read_first_row(uchar * buf, uint primary_key)
3313 {
3314   int error;
3315   DBUG_ENTER("handler::read_first_row");
3316 
3317   /*
3318     If there is very few deleted rows in the table, find the first row by
3319     scanning the table.
3320     TODO remove the test for HA_READ_ORDER
3321   */
3322   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3323       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3324   {
3325     if (likely(!(error= ha_rnd_init(1))))
3326     {
3327       error= ha_rnd_next(buf);
3328       const int end_error= ha_rnd_end();
3329       if (likely(!error))
3330         error= end_error;
3331     }
3332   }
3333   else
3334   {
3335     /* Find the first row through the primary key */
3336     if (likely(!(error= ha_index_init(primary_key, 0))))
3337     {
3338       error= ha_index_first(buf);
3339       const int end_error= ha_index_end();
3340       if (likely(!error))
3341         error= end_error;
3342     }
3343   }
3344   DBUG_RETURN(error);
3345 }
3346 
3347 /**
3348   Generate the next auto-increment number based on increment and offset.
3349   computes the lowest number
3350   - strictly greater than "nr"
3351   - of the form: auto_increment_offset + N * auto_increment_increment
3352   If overflow happened then return MAX_ULONGLONG value as an
3353   indication of overflow.
3354   In most cases increment= offset= 1, in which case we get:
3355   @verbatim 1,2,3,4,5,... @endverbatim
3356     If increment=10 and offset=5 and previous number is 1, we get:
3357   @verbatim 1,5,15,25,35,... @endverbatim
3358 */
3359 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3360 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3361 {
3362   const ulonglong save_nr= nr;
3363 
3364   if (variables->auto_increment_increment == 1)
3365     nr= nr + 1; // optimization of the formula below
3366   else
3367   {
3368     /*
3369        Calculating the number of complete auto_increment_increment extents:
3370     */
3371     nr= (nr + variables->auto_increment_increment -
3372          variables->auto_increment_offset) /
3373         (ulonglong) variables->auto_increment_increment;
3374     /*
3375        Adding an offset to the auto_increment_increment extent boundary:
3376     */
3377     nr= nr * (ulonglong) variables->auto_increment_increment +
3378         variables->auto_increment_offset;
3379   }
3380 
3381   if (unlikely(nr <= save_nr))
3382     return ULONGLONG_MAX;
3383 
3384   return nr;
3385 }
3386 
3387 
adjust_next_insert_id_after_explicit_value(ulonglong nr)3388 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3389 {
3390   /*
3391     If we have set THD::next_insert_id previously and plan to insert an
3392     explicitly-specified value larger than this, we need to increase
3393     THD::next_insert_id to be greater than the explicit value.
3394   */
3395   if ((next_insert_id > 0) && (nr >= next_insert_id))
3396     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3397 }
3398 
3399 
3400 /** @brief
3401   Computes the largest number X:
3402   - smaller than or equal to "nr"
3403   - of the form: auto_increment_offset + N * auto_increment_increment
3404   where N>=0.
3405 
3406   SYNOPSIS
3407     prev_insert_id
3408       nr            Number to "round down"
3409       variables     variables struct containing auto_increment_increment and
3410                     auto_increment_offset
3411 
3412   RETURN
3413     The number X if it exists, "nr" otherwise.
3414 */
3415 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3416 prev_insert_id(ulonglong nr, struct system_variables *variables)
3417 {
3418   if (unlikely(nr < variables->auto_increment_offset))
3419   {
3420     /*
3421       There's nothing good we can do here. That is a pathological case, where
3422       the offset is larger than the column's max possible value, i.e. not even
3423       the first sequence value may be inserted. User will receive warning.
3424     */
3425     DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3426                        "auto_increment_offset: %lu",
3427                        (ulong) nr, variables->auto_increment_offset));
3428     return nr;
3429   }
3430   if (variables->auto_increment_increment == 1)
3431     return nr; // optimization of the formula below
3432   /*
3433      Calculating the number of complete auto_increment_increment extents:
3434   */
3435   nr= (nr - variables->auto_increment_offset) /
3436       (ulonglong) variables->auto_increment_increment;
3437   /*
3438      Adding an offset to the auto_increment_increment extent boundary:
3439   */
3440   return (nr * (ulonglong) variables->auto_increment_increment +
3441           variables->auto_increment_offset);
3442 }
3443 
3444 
3445 /**
3446   Update the auto_increment field if necessary.
3447 
3448   Updates columns with type NEXT_NUMBER if:
3449 
3450   - If column value is set to NULL (in which case
3451     auto_increment_field_not_null is 0)
3452   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3453     set. In the future we will only set NEXT_NUMBER fields if one sets them
3454     to NULL (or they are not included in the insert list).
3455 
3456     In those cases, we check if the currently reserved interval still has
3457     values we have not used. If yes, we pick the smallest one and use it.
3458     Otherwise:
3459 
3460   - If a list of intervals has been provided to the statement via SET
3461     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3462     first unused interval from this list, consider it as reserved.
3463 
3464   - Otherwise we set the column for the first row to the value
3465     next_insert_id(get_auto_increment(column))) which is usually
3466     max-used-column-value+1.
3467     We call get_auto_increment() for the first row in a multi-row
3468     statement. get_auto_increment() will tell us the interval of values it
3469     reserved for us.
3470 
3471   - In both cases, for the following rows we use those reserved values without
3472     calling the handler again (we just progress in the interval, computing
3473     each new value from the previous one). Until we have exhausted them, then
3474     we either take the next provided interval or call get_auto_increment()
3475     again to reserve a new interval.
3476 
3477   - In both cases, the reserved intervals are remembered in
3478     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3479     binlogging; the last reserved interval is remembered in
3480     auto_inc_interval_for_cur_row. The number of reserved intervals is
3481     remembered in auto_inc_intervals_count. It differs from the number of
3482     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3483     latter list is cumulative over all statements forming one binlog event
3484     (when stored functions and triggers are used), and collapses two
3485     contiguous intervals in one (see its append() method).
3486 
3487     The idea is that generated auto_increment values are predictable and
3488     independent of the column values in the table.  This is needed to be
3489     able to replicate into a table that already has rows with a higher
3490     auto-increment value than the one that is inserted.
3491 
3492     After we have already generated an auto-increment number and the user
3493     inserts a column with a higher value than the last used one, we will
3494     start counting from the inserted value.
3495 
3496     This function's "outputs" are: the table's auto_increment field is filled
3497     with a value, thd->next_insert_id is filled with the value to use for the
3498     next row, if a value was autogenerated for the current row it is stored in
3499     thd->insert_id_for_cur_row, if get_auto_increment() was called
3500     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3501     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3502     this list.
3503 
3504   @todo
3505     Replace all references to "next number" or NEXT_NUMBER to
3506     "auto_increment", everywhere (see below: there is
3507     table->auto_increment_field_not_null, and there also exists
3508     table->next_number_field, it's not consistent).
3509 
3510   @retval
3511     0	ok
3512   @retval
3513     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3514     returned ~(ulonglong) 0
3515   @retval
3516     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3517     failure.
3518 */
3519 
3520 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3521 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3522 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3523 
update_auto_increment()3524 int handler::update_auto_increment()
3525 {
3526   ulonglong nr, nb_reserved_values;
3527   bool append= FALSE;
3528   THD *thd= table->in_use;
3529   struct system_variables *variables= &thd->variables;
3530   int result=0, tmp;
3531   DBUG_ENTER("handler::update_auto_increment");
3532 
3533   /*
3534     next_insert_id is a "cursor" into the reserved interval, it may go greater
3535     than the interval, but not smaller.
3536   */
3537   DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3538 
3539   if ((nr= table->next_number_field->val_int()) != 0 ||
3540       (table->auto_increment_field_not_null &&
3541        thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3542   {
3543 
3544     /*
3545       There could be an error reported because value was truncated
3546       when strict mode is enabled.
3547     */
3548     if (thd->is_error())
3549       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3550     /*
3551       Update next_insert_id if we had already generated a value in this
3552       statement (case of INSERT VALUES(null),(3763),(null):
3553       the last NULL needs to insert 3764, not the value of the first NULL plus
3554       1).
3555       Ignore negative values.
3556     */
3557     if ((longlong) nr > 0 || (table->next_number_field->flags & UNSIGNED_FLAG))
3558       adjust_next_insert_id_after_explicit_value(nr);
3559     insert_id_for_cur_row= 0; // didn't generate anything
3560     DBUG_RETURN(0);
3561   }
3562 
3563   if (table->versioned())
3564   {
3565     Field *end= table->vers_end_field();
3566     DBUG_ASSERT(end);
3567     bitmap_set_bit(table->read_set, end->field_index);
3568     if (!end->is_max())
3569     {
3570       if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3571       {
3572         if (!table->next_number_field->real_maybe_null())
3573           DBUG_RETURN(HA_ERR_UNSUPPORTED);
3574         table->next_number_field->set_null();
3575       }
3576       DBUG_RETURN(0);
3577     }
3578   }
3579 
3580   // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT
3581   if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3582     table->next_number_field->set_notnull();
3583 
3584   if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3585   {
3586     /* next_insert_id is beyond what is reserved, so we reserve more. */
3587     const Discrete_interval *forced=
3588       thd->auto_inc_intervals_forced.get_next();
3589     if (forced != NULL)
3590     {
3591       nr= forced->minimum();
3592       nb_reserved_values= forced->values();
3593     }
3594     else
3595     {
3596       /*
3597         handler::estimation_rows_to_insert was set by
3598         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3599       */
3600       ulonglong nb_desired_values;
3601       /*
3602         If an estimation was given to the engine:
3603         - use it.
3604         - if we already reserved numbers, it means the estimation was
3605         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3606         time, twice that the 3rd time etc.
3607         If no estimation was given, use those increasing defaults from the
3608         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3609         Don't go beyond a max to not reserve "way too much" (because
3610         reservation means potentially losing unused values).
3611         Note that in prelocked mode no estimation is given.
3612       */
3613 
3614       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3615         nb_desired_values= estimation_rows_to_insert;
3616       else if ((auto_inc_intervals_count == 0) &&
3617                (thd->lex->many_values.elements > 0))
3618       {
3619         /*
3620           For multi-row inserts, if the bulk inserts cannot be started, the
3621           handler::estimation_rows_to_insert will not be set. But we still
3622           want to reserve the autoinc values.
3623         */
3624         nb_desired_values= thd->lex->many_values.elements;
3625       }
3626       else /* go with the increasing defaults */
3627       {
3628         /* avoid overflow in formula, with this if() */
3629         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3630         {
3631           nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3632             (1 << auto_inc_intervals_count);
3633           set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3634         }
3635         else
3636           nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3637       }
3638       get_auto_increment(variables->auto_increment_offset,
3639                          variables->auto_increment_increment,
3640                          nb_desired_values, &nr,
3641                          &nb_reserved_values);
3642       if (nr == ULONGLONG_MAX)
3643         DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
3644 
3645       /*
3646         That rounding below should not be needed when all engines actually
3647         respect offset and increment in get_auto_increment(). But they don't
3648         so we still do it. Wonder if for the not-first-in-index we should do
3649         it. Hope that this rounding didn't push us out of the interval; even
3650         if it did we cannot do anything about it (calling the engine again
3651         will not help as we inserted no row).
3652       */
3653       nr= compute_next_insert_id(nr-1, variables);
3654     }
3655 
3656     if (table->s->next_number_keypart == 0)
3657     {
3658       /* We must defer the appending until "nr" has been possibly truncated */
3659       append= TRUE;
3660     }
3661     else
3662     {
3663       /*
3664         For such auto_increment there is no notion of interval, just a
3665         singleton. The interval is not even stored in
3666         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3667         for next row.
3668       */
3669       DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3670     }
3671   }
3672 
3673   if (unlikely(nr == ULONGLONG_MAX))
3674       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3675 
3676   DBUG_ASSERT(nr != 0);
3677   DBUG_PRINT("info",("auto_increment: %llu  nb_reserved_values: %llu",
3678                      nr, append ? nb_reserved_values : 0));
3679 
3680   /* Store field without warning (Warning will be printed by insert) */
3681   {
3682     Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE);
3683     tmp= table->next_number_field->store((longlong)nr, TRUE);
3684   }
3685 
3686   if (unlikely(tmp))                            // Out of range value in store
3687   {
3688     /*
3689       First, test if the query was aborted due to strict mode constraints
3690       or new field value greater than maximum integer value:
3691     */
3692     if (thd->killed == KILL_BAD_DATA ||
3693         nr > table->next_number_field->get_max_int_value())
3694     {
3695       /*
3696         It's better to return an error here than getting a confusing
3697         'duplicate key error' later.
3698       */
3699       result= HA_ERR_AUTOINC_ERANGE;
3700     }
3701     else
3702     {
3703       /*
3704         Field refused this value (overflow) and truncated it, use the result
3705         of the truncation (which is going to be inserted); however we try to
3706         decrease it to honour auto_increment_* variables.
3707         That will shift the left bound of the reserved interval, we don't
3708         bother shifting the right bound (anyway any other value from this
3709         interval will cause a duplicate key).
3710       */
3711       nr= prev_insert_id(table->next_number_field->val_int(), variables);
3712       if (unlikely(table->next_number_field->store((longlong)nr, TRUE)))
3713         nr= table->next_number_field->val_int();
3714     }
3715   }
3716   if (append)
3717   {
3718     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3719                                           variables->auto_increment_increment);
3720     auto_inc_intervals_count++;
3721     /* Row-based replication does not need to store intervals in binlog */
3722     if (((WSREP_NNULL(thd) && wsrep_emulate_bin_log) ||
3723          mysql_bin_log.is_open()) &&
3724         !thd->is_current_stmt_binlog_format_row())
3725       thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3726         append(auto_inc_interval_for_cur_row.minimum(),
3727                auto_inc_interval_for_cur_row.values(),
3728                variables->auto_increment_increment);
3729   }
3730 
3731   /*
3732     Record this autogenerated value. If the caller then
3733     succeeds to insert this value, it will call
3734     record_first_successful_insert_id_in_cur_stmt()
3735     which will set first_successful_insert_id_in_cur_stmt if it's not
3736     already set.
3737   */
3738   insert_id_for_cur_row= nr;
3739 
3740   if (result)                                   // overflow
3741     DBUG_RETURN(result);
3742 
3743   /*
3744     Set next insert id to point to next auto-increment value to be able to
3745     handle multi-row statements.
3746   */
3747   set_next_insert_id(compute_next_insert_id(nr, variables));
3748 
3749   DBUG_RETURN(0);
3750 }
3751 
3752 
3753 /** @brief
3754   MySQL signal that it changed the column bitmap
3755 
3756   USAGE
3757     This is for handlers that needs to setup their own column bitmaps.
3758     Normally the handler should set up their own column bitmaps in
3759     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3760     this.
3761 
3762     The handler is allowed to do changes to the bitmap after a index_init or
3763     rnd_init() call is made as after this, MySQL will not use the bitmap
3764     for any program logic checking.
3765 */
column_bitmaps_signal()3766 void handler::column_bitmaps_signal()
3767 {
3768   DBUG_ENTER("column_bitmaps_signal");
3769   if (table)
3770     DBUG_PRINT("info", ("read_set: %p  write_set: %p",
3771                         table->read_set, table->write_set));
3772   DBUG_VOID_RETURN;
3773 }
3774 
3775 
3776 /** @brief
3777   Reserves an interval of auto_increment values from the handler.
3778 
3779   SYNOPSIS
3780     get_auto_increment()
3781     offset
3782     increment
3783     nb_desired_values   how many values we want
3784     first_value         (OUT) the first value reserved by the handler
3785     nb_reserved_values  (OUT) how many values the handler reserved
3786 
3787   offset and increment means that we want values to be of the form
3788   offset + N * increment, where N>=0 is integer.
3789   If the function sets *first_value to ~(ulonglong)0 it means an error.
3790   If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3791   reserved to "positive infinite".
3792 */
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3793 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3794                                  ulonglong nb_desired_values,
3795                                  ulonglong *first_value,
3796                                  ulonglong *nb_reserved_values)
3797 {
3798   ulonglong nr;
3799   int error;
3800   MY_BITMAP *old_read_set;
3801   bool rnd_inited= (inited ==  RND);
3802 
3803   if (rnd_inited && ha_rnd_end())
3804     return;
3805 
3806   old_read_set= table->prepare_for_keyread(table->s->next_number_index);
3807 
3808   if (ha_index_init(table->s->next_number_index, 1))
3809   {
3810     /* This should never happen, assert in debug, and fail in release build */
3811     DBUG_ASSERT(0);
3812     (void) extra(HA_EXTRA_NO_KEYREAD);
3813     *first_value= ULONGLONG_MAX;
3814     if (rnd_inited && ha_rnd_init_with_error(0))
3815     {
3816       //TODO: it would be nice to return here an error
3817     }
3818     return;
3819   }
3820 
3821   if (table->s->next_number_keypart == 0)
3822   {						// Autoincrement at key-start
3823     error= ha_index_last(table->record[1]);
3824     /*
3825       MySQL implicitly assumes such method does locking (as MySQL decides to
3826       use nr+increment without checking again with the handler, in
3827       handler::update_auto_increment()), so reserves to infinite.
3828     */
3829     *nb_reserved_values= ULONGLONG_MAX;
3830   }
3831   else
3832   {
3833     uchar key[MAX_KEY_LENGTH];
3834     key_copy(key, table->record[0],
3835              table->key_info + table->s->next_number_index,
3836              table->s->next_number_key_offset);
3837     error= ha_index_read_map(table->record[1], key,
3838                              make_prev_keypart_map(table->s->
3839                                                    next_number_keypart),
3840                              HA_READ_PREFIX_LAST);
3841     /*
3842       MySQL needs to call us for next row: assume we are inserting ("a",null)
3843       here, we return 3, and next this statement will want to insert
3844       ("b",null): there is no reason why ("b",3+1) would be the good row to
3845       insert: maybe it already exists, maybe 3+1 is too large...
3846     */
3847     *nb_reserved_values= 1;
3848   }
3849 
3850   if (unlikely(error))
3851   {
3852     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3853       /* No entry found, that's fine */;
3854     else
3855       print_error(error, MYF(0));
3856     nr= 1;
3857   }
3858   else
3859     nr= ((ulonglong) table->next_number_field->
3860          val_int_offset(table->s->rec_buff_length)+1);
3861   ha_index_end();
3862   table->restore_column_maps_after_keyread(old_read_set);
3863   *first_value= nr;
3864   if (rnd_inited && ha_rnd_init_with_error(0))
3865   {
3866     //TODO: it would be nice to return here an error
3867   }
3868   return;
3869 }
3870 
3871 
ha_release_auto_increment()3872 void handler::ha_release_auto_increment()
3873 {
3874   DBUG_ENTER("ha_release_auto_increment");
3875   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3876               m_lock_type != F_UNLCK ||
3877               (!next_insert_id && !insert_id_for_cur_row));
3878   release_auto_increment();
3879   insert_id_for_cur_row= 0;
3880   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3881   auto_inc_intervals_count= 0;
3882   if (next_insert_id > 0)
3883   {
3884     next_insert_id= 0;
3885     /*
3886       this statement used forced auto_increment values if there were some,
3887       wipe them away for other statements.
3888     */
3889     table->in_use->auto_inc_intervals_forced.empty();
3890   }
3891   DBUG_VOID_RETURN;
3892 }
3893 
3894 
3895 /**
3896   Construct and emit duplicate key error message using information
3897   from table's record buffer.
3898 
3899   @param table    TABLE object which record buffer should be used as
3900                   source for column values.
3901   @param key      Key description.
3902   @param msg      Error message template to which key value should be
3903                   added.
3904   @param errflag  Flags for my_error() call.
3905 
3906   @notes
3907     The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly
3908     with old code, the error number is ER_DUP_ENTRY
3909 */
3910 
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3911 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3912 {
3913   /* Write the duplicated key in the error message */
3914   char key_buff[MAX_KEY_LENGTH];
3915   String str(key_buff,sizeof(key_buff),system_charset_info);
3916 
3917   if (key == NULL)
3918   {
3919     /*
3920       Key is unknown. Should only happen if storage engine reports wrong
3921       duplicate key number.
3922     */
3923     my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "*UNKNOWN*");
3924   }
3925   else
3926   {
3927     if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3928       setup_keyinfo_hash(key);
3929     /* Table is opened and defined at this point */
3930     key_unpack(&str,table, key);
3931     uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3932     if (str.length() >= max_length)
3933     {
3934       str.length(max_length-4);
3935       str.append(STRING_WITH_LEN("..."));
3936     }
3937     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
3938                     key->name.str);
3939     if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3940       re_setup_keyinfo_hash(key);
3941   }
3942 }
3943 
3944 /**
3945   Construct and emit duplicate key error message using information
3946   from table's record buffer.
3947 
3948   @sa print_keydup_error(table, key, msg, errflag).
3949 */
3950 
print_keydup_error(TABLE * table,KEY * key,myf errflag)3951 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3952 {
3953   print_keydup_error(table, key,
3954                      ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME),
3955                      errflag);
3956 }
3957 
3958 /**
3959   Print error that we got from handler function.
3960 
3961   @note
3962     In case of delete table it's only safe to use the following parts of
3963     the 'table' structure:
3964     - table->s->path
3965     - table->alias
3966 */
3967 
3968 #define SET_FATAL_ERROR fatal_error=1
3969 
print_error(int error,myf errflag)3970 void handler::print_error(int error, myf errflag)
3971 {
3972   bool fatal_error= 0;
3973   DBUG_ENTER("handler::print_error");
3974   DBUG_PRINT("enter",("error: %d",error));
3975 
3976   if (ha_thd()->transaction_rollback_request)
3977   {
3978     /* Ensure this becomes a true error */
3979     errflag&= ~(ME_WARNING | ME_NOTE);
3980   }
3981 
3982   int textno= -1; // impossible value
3983   switch (error) {
3984   case EACCES:
3985     textno=ER_OPEN_AS_READONLY;
3986     break;
3987   case EAGAIN:
3988     textno=ER_FILE_USED;
3989     break;
3990   case ENOENT:
3991   case ENOTDIR:
3992   case ELOOP:
3993     textno=ER_FILE_NOT_FOUND;
3994     break;
3995   case ENOSPC:
3996   case HA_ERR_DISK_FULL:
3997     textno= ER_DISK_FULL;
3998     SET_FATAL_ERROR;                            // Ensure error is logged
3999     break;
4000   case HA_ERR_KEY_NOT_FOUND:
4001   case HA_ERR_NO_ACTIVE_RECORD:
4002   case HA_ERR_RECORD_DELETED:
4003   case HA_ERR_END_OF_FILE:
4004     /*
4005       This errors is not not normally fatal (for example for reads). However
4006       if you get it during an update or delete, then its fatal.
4007       As the user is calling print_error() (which is not done on read), we
4008       assume something when wrong with the update or delete.
4009     */
4010     SET_FATAL_ERROR;
4011     textno=ER_KEY_NOT_FOUND;
4012     break;
4013   case HA_ERR_ABORTED_BY_USER:
4014   {
4015     DBUG_ASSERT(ha_thd()->killed);
4016     ha_thd()->send_kill_message();
4017     DBUG_VOID_RETURN;
4018   }
4019   case HA_ERR_WRONG_MRG_TABLE_DEF:
4020     textno=ER_WRONG_MRG_TABLE;
4021     break;
4022   case HA_ERR_FOUND_DUPP_KEY:
4023   {
4024     if (table)
4025     {
4026       uint key_nr=get_dup_key(error);
4027       if ((int) key_nr >= 0 && key_nr < table->s->keys)
4028       {
4029         print_keydup_error(table, &table->key_info[key_nr], errflag);
4030         DBUG_VOID_RETURN;
4031       }
4032     }
4033     textno=ER_DUP_KEY;
4034     break;
4035   }
4036   case HA_ERR_FOREIGN_DUPLICATE_KEY:
4037   {
4038     char rec_buf[MAX_KEY_LENGTH];
4039     String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4040     /* Table is opened and defined at this point */
4041 
4042     /*
4043       Just print the subset of fields that are part of the first index,
4044       printing the whole row from there is not easy.
4045     */
4046     key_unpack(&rec, table, &table->key_info[0]);
4047 
4048     char child_table_name[NAME_LEN + 1];
4049     char child_key_name[NAME_LEN + 1];
4050     if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4051                             child_key_name, sizeof(child_key_name)))
4052     {
4053       my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4054                table_share->table_name.str, rec.c_ptr_safe(),
4055                child_table_name, child_key_name);
4056       }
4057     else
4058     {
4059       my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4060                table_share->table_name.str, rec.c_ptr_safe());
4061     }
4062     DBUG_VOID_RETURN;
4063   }
4064   case HA_ERR_NULL_IN_SPATIAL:
4065     my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4066     DBUG_VOID_RETURN;
4067   case HA_ERR_FOUND_DUPP_UNIQUE:
4068     textno=ER_DUP_UNIQUE;
4069     break;
4070   case HA_ERR_RECORD_CHANGED:
4071     /*
4072       This is not fatal error when using HANDLER interface
4073       SET_FATAL_ERROR;
4074     */
4075     textno=ER_CHECKREAD;
4076     break;
4077   case HA_ERR_CRASHED:
4078     SET_FATAL_ERROR;
4079     textno=ER_NOT_KEYFILE;
4080     break;
4081   case HA_ERR_WRONG_IN_RECORD:
4082     SET_FATAL_ERROR;
4083     textno= ER_CRASHED_ON_USAGE;
4084     break;
4085   case HA_ERR_CRASHED_ON_USAGE:
4086     SET_FATAL_ERROR;
4087     textno=ER_CRASHED_ON_USAGE;
4088     break;
4089   case HA_ERR_NOT_A_TABLE:
4090     textno= error;
4091     break;
4092   case HA_ERR_CRASHED_ON_REPAIR:
4093     SET_FATAL_ERROR;
4094     textno=ER_CRASHED_ON_REPAIR;
4095     break;
4096   case HA_ERR_OUT_OF_MEM:
4097     textno=ER_OUT_OF_RESOURCES;
4098     break;
4099   case HA_ERR_WRONG_COMMAND:
4100     my_error(ER_ILLEGAL_HA, MYF(0), table_type(), table_share->db.str,
4101              table_share->table_name.str);
4102     DBUG_VOID_RETURN;
4103     break;
4104   case HA_ERR_OLD_FILE:
4105     textno=ER_OLD_KEYFILE;
4106     break;
4107   case HA_ERR_UNSUPPORTED:
4108     textno=ER_UNSUPPORTED_EXTENSION;
4109     break;
4110   case HA_ERR_RECORD_FILE_FULL:
4111   {
4112     textno=ER_RECORD_FILE_FULL;
4113     /* Write the error message to error log */
4114     errflag|= ME_ERROR_LOG;
4115     break;
4116   }
4117   case HA_ERR_INDEX_FILE_FULL:
4118   {
4119     textno=ER_INDEX_FILE_FULL;
4120     /* Write the error message to error log */
4121     errflag|= ME_ERROR_LOG;
4122     break;
4123   }
4124   case HA_ERR_LOCK_WAIT_TIMEOUT:
4125     textno=ER_LOCK_WAIT_TIMEOUT;
4126     break;
4127   case HA_ERR_LOCK_TABLE_FULL:
4128     textno=ER_LOCK_TABLE_FULL;
4129     break;
4130   case HA_ERR_LOCK_DEADLOCK:
4131   {
4132     String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), system_charset_info);
4133 
4134     get_error_message(error, &str);
4135     full_err_msg.append(str);
4136     my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe());
4137     DBUG_VOID_RETURN;
4138   }
4139   case HA_ERR_READ_ONLY_TRANSACTION:
4140     textno=ER_READ_ONLY_TRANSACTION;
4141     break;
4142   case HA_ERR_CANNOT_ADD_FOREIGN:
4143     textno=ER_CANNOT_ADD_FOREIGN;
4144     break;
4145   case HA_ERR_ROW_IS_REFERENCED:
4146   {
4147     String str;
4148     get_error_message(error, &str);
4149     my_printf_error(ER_ROW_IS_REFERENCED_2,
4150                     ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED),
4151                     errflag, str.c_ptr_safe());
4152     DBUG_VOID_RETURN;
4153   }
4154   case HA_ERR_NO_REFERENCED_ROW:
4155   {
4156     String str;
4157     get_error_message(error, &str);
4158     my_printf_error(ER_NO_REFERENCED_ROW_2,
4159                     ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW),
4160                     errflag, str.c_ptr_safe());
4161     DBUG_VOID_RETURN;
4162   }
4163   case HA_ERR_TABLE_DEF_CHANGED:
4164     textno=ER_TABLE_DEF_CHANGED;
4165     break;
4166   case HA_ERR_NO_SUCH_TABLE:
4167     my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str,
4168              table_share->table_name.str);
4169     DBUG_VOID_RETURN;
4170   case HA_ERR_RBR_LOGGING_FAILED:
4171     textno= ER_BINLOG_ROW_LOGGING_FAILED;
4172     break;
4173   case HA_ERR_DROP_INDEX_FK:
4174   {
4175     const char *ptr= "???";
4176     uint key_nr= get_dup_key(error);
4177     if ((int) key_nr >= 0)
4178       ptr= table->key_info[key_nr].name.str;
4179     my_error(ER_DROP_INDEX_FK, errflag, ptr);
4180     DBUG_VOID_RETURN;
4181   }
4182   case HA_ERR_TABLE_NEEDS_UPGRADE:
4183     textno= ER_TABLE_NEEDS_UPGRADE;
4184     my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
4185              "TABLE", table_share->table_name.str);
4186     DBUG_VOID_RETURN;
4187   case HA_ERR_NO_PARTITION_FOUND:
4188     textno=ER_WRONG_PARTITION_NAME;
4189     break;
4190   case HA_ERR_TABLE_READONLY:
4191     textno= ER_OPEN_AS_READONLY;
4192     break;
4193   case HA_ERR_AUTOINC_READ_FAILED:
4194     textno= ER_AUTOINC_READ_FAILED;
4195     break;
4196   case HA_ERR_AUTOINC_ERANGE:
4197     textno= error;
4198     my_error(textno, errflag, table->next_number_field->field_name.str,
4199              table->in_use->get_stmt_da()->current_row_for_warning());
4200     DBUG_VOID_RETURN;
4201     break;
4202   case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4203     textno= ER_TOO_MANY_CONCURRENT_TRXS;
4204     break;
4205   case HA_ERR_INDEX_COL_TOO_LONG:
4206     textno= ER_INDEX_COLUMN_TOO_LONG;
4207     break;
4208   case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4209     textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4210     break;
4211   case HA_ERR_INDEX_CORRUPT:
4212     textno= ER_INDEX_CORRUPT;
4213     break;
4214   case HA_ERR_UNDO_REC_TOO_BIG:
4215     textno= ER_UNDO_RECORD_TOO_BIG;
4216     break;
4217   case HA_ERR_TABLE_IN_FK_CHECK:
4218     textno= ER_TABLE_IN_FK_CHECK;
4219     break;
4220   case HA_ERR_COMMIT_ERROR:
4221     textno= ER_ERROR_DURING_COMMIT;
4222     break;
4223   case HA_ERR_PARTITION_LIST:
4224     my_error(ER_VERS_NOT_ALLOWED, errflag, table->s->db.str, table->s->table_name.str);
4225     DBUG_VOID_RETURN;
4226   default:
4227     {
4228       /* The error was "unknown" to this function.
4229 	 Ask handler if it has got a message for this error */
4230       bool temporary= FALSE;
4231       String str;
4232       temporary= get_error_message(error, &str);
4233       if (!str.is_empty())
4234       {
4235 	const char* engine= table_type();
4236 	if (temporary)
4237 	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
4238                    engine);
4239 	else
4240         {
4241           SET_FATAL_ERROR;
4242 	  my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
4243         }
4244       }
4245       else
4246         my_error(ER_GET_ERRNO, errflag, error, table_type());
4247       DBUG_VOID_RETURN;
4248     }
4249   }
4250   DBUG_ASSERT(textno > 0);
4251   if (unlikely(fatal_error))
4252   {
4253     /* Ensure this becomes a true error */
4254     errflag&= ~(ME_WARNING | ME_NOTE);
4255     if ((debug_assert_if_crashed_table ||
4256                       global_system_variables.log_warnings > 1))
4257     {
4258       /*
4259         Log error to log before we crash or if extended warnings are requested
4260       */
4261       errflag|= ME_ERROR_LOG;
4262     }
4263   }
4264 
4265   /* if we got an OS error from a file-based engine, specify a path of error */
4266   if (error < HA_ERR_FIRST && bas_ext()[0])
4267   {
4268     char buff[FN_REFLEN];
4269     strxnmov(buff, sizeof(buff),
4270              table_share->normalized_path.str, bas_ext()[0], NULL);
4271     my_error(textno, errflag, buff, error);
4272   }
4273   else
4274     my_error(textno, errflag, table_share->table_name.str, error);
4275   DBUG_VOID_RETURN;
4276 }
4277 
4278 
4279 /**
4280   Return an error message specific to this handler.
4281 
4282   @param error  error code previously returned by handler
4283   @param buf    pointer to String where to add error message
4284 
4285   @return
4286     Returns true if this is a temporary error
4287 */
get_error_message(int error,String * buf)4288 bool handler::get_error_message(int error, String* buf)
4289 {
4290   DBUG_EXECUTE_IF("external_lock_failure",
4291                   buf->set_ascii(STRING_WITH_LEN("KABOOM!")););
4292   return FALSE;
4293 }
4294 
4295 /**
4296   Check for incompatible collation changes.
4297 
4298   @retval
4299     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4300   @retval
4301     0                        No upgrade required.
4302 */
4303 
check_collation_compatibility()4304 int handler::check_collation_compatibility()
4305 {
4306   ulong mysql_version= table->s->mysql_version;
4307 
4308   if (mysql_version < 50124)
4309   {
4310     KEY *key= table->key_info;
4311     KEY *key_end= key + table->s->keys;
4312     for (; key < key_end; key++)
4313     {
4314       KEY_PART_INFO *key_part= key->key_part;
4315       KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4316       for (; key_part < key_part_end; key_part++)
4317       {
4318         if (!key_part->fieldnr)
4319           continue;
4320         Field *field= table->field[key_part->fieldnr - 1];
4321         uint cs_number= field->charset()->number;
4322         if ((mysql_version < 50048 &&
4323              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4324               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4325               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4326               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4327               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4328               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4329               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4330               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4331              (mysql_version < 50124 &&
4332              (cs_number == 33 || /* utf8mb3_general_ci - bug #27877 */
4333               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4334           return HA_ADMIN_NEEDS_UPGRADE;
4335       }
4336     }
4337   }
4338 
4339   return 0;
4340 }
4341 
4342 
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4343 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4344 {
4345   int error;
4346   KEY *keyinfo, *keyend;
4347   KEY_PART_INFO *keypart, *keypartend;
4348 
4349   if (table->s->incompatible_version)
4350     return HA_ADMIN_NEEDS_ALTER;
4351 
4352   if (!table->s->mysql_version)
4353   {
4354     /* check for blob-in-key error */
4355     keyinfo= table->key_info;
4356     keyend= table->key_info + table->s->keys;
4357     for (; keyinfo < keyend; keyinfo++)
4358     {
4359       keypart= keyinfo->key_part;
4360       keypartend= keypart + keyinfo->user_defined_key_parts;
4361       for (; keypart < keypartend; keypart++)
4362       {
4363         if (!keypart->fieldnr)
4364           continue;
4365         Field *field= table->field[keypart->fieldnr-1];
4366         if (field->type() == MYSQL_TYPE_BLOB)
4367         {
4368           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4369             check_opt->flags= T_MEDIUM;
4370           return HA_ADMIN_NEEDS_CHECK;
4371         }
4372       }
4373     }
4374   }
4375   if (table->s->frm_version < FRM_VER_TRUE_VARCHAR)
4376     return HA_ADMIN_NEEDS_ALTER;
4377 
4378   if (unlikely((error= check_collation_compatibility())))
4379     return error;
4380 
4381   return check_for_upgrade(check_opt);
4382 }
4383 
4384 
check_old_types()4385 int handler::check_old_types()
4386 {
4387   Field** field;
4388 
4389   if (!table->s->mysql_version)
4390   {
4391     /* check for bad DECIMAL field */
4392     for (field= table->field; (*field); field++)
4393     {
4394       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4395       {
4396         return HA_ADMIN_NEEDS_ALTER;
4397       }
4398       if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4399       {
4400         return HA_ADMIN_NEEDS_ALTER;
4401       }
4402     }
4403   }
4404   return 0;
4405 }
4406 
4407 
update_frm_version(TABLE * table)4408 static bool update_frm_version(TABLE *table)
4409 {
4410   char path[FN_REFLEN];
4411   File file;
4412   int result= 1;
4413   DBUG_ENTER("update_frm_version");
4414 
4415   /*
4416     No need to update frm version in case table was created or checked
4417     by server with the same version. This also ensures that we do not
4418     update frm version for temporary tables as this code doesn't support
4419     temporary tables.
4420   */
4421   if (table->s->mysql_version == MYSQL_VERSION_ID)
4422     DBUG_RETURN(0);
4423 
4424   strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4425 
4426   if ((file= mysql_file_open(key_file_frm,
4427                              path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4428   {
4429     uchar version[4];
4430 
4431     int4store(version, MYSQL_VERSION_ID);
4432 
4433     if ((result= (int)mysql_file_pwrite(file, (uchar*) version, 4, 51L,
4434                                         MYF(MY_WME+MY_NABP))))
4435       goto err;
4436 
4437     table->s->mysql_version= MYSQL_VERSION_ID;
4438   }
4439 err:
4440   if (file >= 0)
4441     (void) mysql_file_close(file, MYF(MY_WME));
4442   DBUG_RETURN(result);
4443 }
4444 
4445 
4446 
4447 /**
4448   @return
4449     key if error because of duplicated keys
4450 */
get_dup_key(int error)4451 uint handler::get_dup_key(int error)
4452 {
4453   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4454   DBUG_ENTER("handler::get_dup_key");
4455 
4456   if (lookup_errkey != (uint)-1)
4457     DBUG_RETURN(errkey= lookup_errkey);
4458 
4459   errkey= (uint)-1;
4460   if (error == HA_ERR_FOUND_DUPP_KEY ||
4461       error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
4462       error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4463       error == HA_ERR_DROP_INDEX_FK)
4464     info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4465   DBUG_RETURN(errkey);
4466 }
4467 
4468 
4469 /**
4470   Delete all files with extension from bas_ext().
4471 
4472   @param name		Base name of table
4473 
4474   @note
4475     We assume that the handler may return more extensions than
4476     was actually used for the file. We also assume that the first
4477     extension is the most important one (see the comment near
4478     handlerton::tablefile_extensions). If this exist and we can't delete
4479     that it, we will abort the delete.
4480     If the first one doesn't exists, we have to try to delete all other
4481     extension as there is chance that the server had crashed between
4482     the delete of the first file and the next
4483 
4484   @retval
4485     0   If we successfully deleted at least one file from base_ext and
4486         didn't get any other errors than ENOENT
4487 
4488   @retval
4489     !0  Error
4490 */
4491 
delete_table(const char * name)4492 int handler::delete_table(const char *name)
4493 {
4494   int saved_error= ENOENT;
4495   bool abort_if_first_file_error= 1;
4496   bool some_file_deleted= 0;
4497   DBUG_ENTER("handler::delete_table");
4498 
4499   for (const char **ext= bas_ext(); *ext ; ext++)
4500   {
4501     int err= mysql_file_delete_with_symlink(key_file_misc, name, *ext, MYF(0));
4502     if (err)
4503     {
4504       if (my_errno != ENOENT)
4505       {
4506         saved_error= my_errno;
4507         /*
4508           If error other than file not found on the first existing file,
4509           return the error.
4510           Otherwise delete as much as possible.
4511         */
4512         if (abort_if_first_file_error)
4513           DBUG_RETURN(saved_error);
4514       }
4515     }
4516     else
4517       some_file_deleted= 1;
4518     abort_if_first_file_error= 0;
4519   }
4520   DBUG_RETURN(some_file_deleted && saved_error == ENOENT ? 0 : saved_error);
4521 }
4522 
4523 
rename_table(const char * from,const char * to)4524 int handler::rename_table(const char * from, const char * to)
4525 {
4526   int error= 0;
4527   const char **ext, **start_ext;
4528   start_ext= bas_ext();
4529   for (ext= start_ext; *ext ; ext++)
4530   {
4531     if (unlikely(rename_file_ext(from, to, *ext)))
4532     {
4533       if ((error=my_errno) != ENOENT)
4534 	break;
4535       error= 0;
4536     }
4537   }
4538   if (unlikely(error))
4539   {
4540     /* Try to revert the rename. Ignore errors. */
4541     for (; ext >= start_ext; ext--)
4542       rename_file_ext(to, from, *ext);
4543   }
4544   return error;
4545 }
4546 
4547 
drop_table(const char * name)4548 void handler::drop_table(const char *name)
4549 {
4550   ha_close();
4551   delete_table(name);
4552 }
4553 
4554 
4555 /**
4556    Return true if the error from drop table means that the
4557    table didn't exists
4558 */
4559 
non_existing_table_error(int error)4560 bool non_existing_table_error(int error)
4561 {
4562   return (error == ENOENT ||
4563           (error == EE_DELETE && my_errno == ENOENT) ||
4564           error == HA_ERR_NO_SUCH_TABLE ||
4565           error == HA_ERR_UNSUPPORTED ||
4566           error == ER_NO_SUCH_TABLE ||
4567           error == ER_NO_SUCH_TABLE_IN_ENGINE ||
4568           error == ER_WRONG_OBJECT);
4569 }
4570 
4571 
4572 /**
4573   Performs checks upon the table.
4574 
4575   @param thd                thread doing CHECK TABLE operation
4576   @param check_opt          options from the parser
4577 
4578   @retval
4579     HA_ADMIN_OK               Successful upgrade
4580   @retval
4581     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4582   @retval
4583     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4584   @retval
4585     HA_ADMIN_NOT_IMPLEMENTED
4586 */
4587 
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4588 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4589 {
4590   int error;
4591   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4592               m_lock_type != F_UNLCK);
4593 
4594   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4595       (check_opt->sql_flags & TT_FOR_UPGRADE))
4596     return 0;
4597 
4598   if (table->s->mysql_version < MYSQL_VERSION_ID)
4599   {
4600     if (unlikely((error= check_old_types())))
4601       return error;
4602     error= ha_check_for_upgrade(check_opt);
4603     if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK)))
4604       return error;
4605     if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE)))
4606       return 0;
4607   }
4608   if (unlikely((error= check(thd, check_opt))))
4609     return error;
4610   /* Skip updating frm version if not main handler. */
4611   if (table->file != this)
4612     return error;
4613   return update_frm_version(table);
4614 }
4615 
4616 /**
4617   A helper function to mark a transaction read-write,
4618   if it is started.
4619 */
4620 
mark_trx_read_write_internal()4621 void handler::mark_trx_read_write_internal()
4622 {
4623   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4624   /*
4625     When a storage engine method is called, the transaction must
4626     have been started, unless it's a DDL call, for which the
4627     storage engine starts the transaction internally, and commits
4628     it internally, without registering in the ha_list.
4629     Unfortunately here we can't know know for sure if the engine
4630     has registered the transaction or not, so we must check.
4631   */
4632   if (ha_info->is_started())
4633   {
4634     /*
4635       table_share can be NULL, for example, in ha_delete_table() or
4636       ha_rename_table().
4637     */
4638     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4639       ha_info->set_trx_read_write();
4640   }
4641 }
4642 
4643 
4644 /**
4645   Repair table: public interface.
4646 
4647   @sa handler::repair()
4648 */
4649 
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4650 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4651 {
4652   int result;
4653 
4654   mark_trx_read_write();
4655 
4656   result= repair(thd, check_opt);
4657   DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4658               ha_table_flags() & HA_CAN_REPAIR);
4659 
4660   if (result == HA_ADMIN_OK)
4661     result= update_frm_version(table);
4662   return result;
4663 }
4664 
4665 
4666 /**
4667    End bulk insert
4668 */
4669 
ha_end_bulk_insert()4670 int handler::ha_end_bulk_insert()
4671 {
4672   DBUG_ENTER("handler::ha_end_bulk_insert");
4673   DBUG_EXECUTE_IF("crash_end_bulk_insert",
4674                   { extra(HA_EXTRA_FLUSH) ; DBUG_SUICIDE();});
4675   estimation_rows_to_insert= 0;
4676   DBUG_RETURN(end_bulk_insert());
4677 }
4678 
4679 /**
4680   Bulk update row: public interface.
4681 
4682   @sa handler::bulk_update_row()
4683 */
4684 
4685 int
ha_bulk_update_row(const uchar * old_data,const uchar * new_data,ha_rows * dup_key_found)4686 handler::ha_bulk_update_row(const uchar *old_data, const uchar *new_data,
4687                             ha_rows *dup_key_found)
4688 {
4689   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4690               m_lock_type == F_WRLCK);
4691   mark_trx_read_write();
4692 
4693   return bulk_update_row(old_data, new_data, dup_key_found);
4694 }
4695 
4696 
4697 /**
4698   Delete all rows: public interface.
4699 
4700   @sa handler::delete_all_rows()
4701 */
4702 
4703 int
ha_delete_all_rows()4704 handler::ha_delete_all_rows()
4705 {
4706   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4707               m_lock_type == F_WRLCK);
4708   mark_trx_read_write();
4709 
4710   return delete_all_rows();
4711 }
4712 
4713 
4714 /**
4715   Truncate table: public interface.
4716 
4717   @sa handler::truncate()
4718 */
4719 
4720 int
ha_truncate()4721 handler::ha_truncate()
4722 {
4723   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4724               m_lock_type == F_WRLCK);
4725   mark_trx_read_write();
4726 
4727   return truncate();
4728 }
4729 
4730 
4731 /**
4732   Reset auto increment: public interface.
4733 
4734   @sa handler::reset_auto_increment()
4735 */
4736 
4737 int
ha_reset_auto_increment(ulonglong value)4738 handler::ha_reset_auto_increment(ulonglong value)
4739 {
4740   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4741               m_lock_type == F_WRLCK);
4742   mark_trx_read_write();
4743 
4744   return reset_auto_increment(value);
4745 }
4746 
4747 
4748 /**
4749   Optimize table: public interface.
4750 
4751   @sa handler::optimize()
4752 */
4753 
4754 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4755 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4756 {
4757   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4758               m_lock_type == F_WRLCK);
4759   mark_trx_read_write();
4760 
4761   return optimize(thd, check_opt);
4762 }
4763 
4764 
4765 /**
4766   Analyze table: public interface.
4767 
4768   @sa handler::analyze()
4769 */
4770 
4771 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4772 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4773 {
4774   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4775               m_lock_type != F_UNLCK);
4776   mark_trx_read_write();
4777 
4778   return analyze(thd, check_opt);
4779 }
4780 
4781 
4782 /**
4783   Check and repair table: public interface.
4784 
4785   @sa handler::check_and_repair()
4786 */
4787 
4788 bool
ha_check_and_repair(THD * thd)4789 handler::ha_check_and_repair(THD *thd)
4790 {
4791   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4792               m_lock_type == F_UNLCK);
4793   mark_trx_read_write();
4794 
4795   return check_and_repair(thd);
4796 }
4797 
4798 
4799 /**
4800   Disable indexes: public interface.
4801 
4802   @sa handler::disable_indexes()
4803 */
4804 
4805 int
ha_disable_indexes(uint mode)4806 handler::ha_disable_indexes(uint mode)
4807 {
4808   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4809               m_lock_type != F_UNLCK);
4810   mark_trx_read_write();
4811 
4812   return disable_indexes(mode);
4813 }
4814 
4815 
4816 /**
4817   Enable indexes: public interface.
4818 
4819   @sa handler::enable_indexes()
4820 */
4821 
4822 int
ha_enable_indexes(uint mode)4823 handler::ha_enable_indexes(uint mode)
4824 {
4825   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4826               m_lock_type != F_UNLCK);
4827   mark_trx_read_write();
4828 
4829   return enable_indexes(mode);
4830 }
4831 
4832 
4833 /**
4834   Discard or import tablespace: public interface.
4835 
4836   @sa handler::discard_or_import_tablespace()
4837 */
4838 
4839 int
ha_discard_or_import_tablespace(my_bool discard)4840 handler::ha_discard_or_import_tablespace(my_bool discard)
4841 {
4842   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4843               m_lock_type == F_WRLCK);
4844   mark_trx_read_write();
4845 
4846   return discard_or_import_tablespace(discard);
4847 }
4848 
4849 
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4850 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4851                                              Alter_inplace_info *ha_alter_info)
4852 {
4853   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4854               m_lock_type != F_UNLCK);
4855   mark_trx_read_write();
4856 
4857   return prepare_inplace_alter_table(altered_table, ha_alter_info);
4858 }
4859 
4860 
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4861 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4862                                             Alter_inplace_info *ha_alter_info,
4863                                             bool commit)
4864 {
4865    /*
4866      At this point we should have an exclusive metadata lock on the table.
4867      The exception is if we're about to roll back changes (commit= false).
4868      In this case, we might be rolling back after a failed lock upgrade,
4869      so we could be holding the same lock level as for inplace_alter_table().
4870    */
4871    DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4872                                                    table->s->db.str,
4873                                                    table->s->table_name.str,
4874                                                    MDL_EXCLUSIVE) ||
4875                !commit);
4876 
4877    return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4878 }
4879 
4880 
4881 /*
4882    Default implementation to support in-place alter table
4883    and old online add/drop index API
4884 */
4885 
4886 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4887 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4888                                           Alter_inplace_info *ha_alter_info)
4889 {
4890   DBUG_ENTER("handler::check_if_supported_inplace_alter");
4891 
4892   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4893 
4894   if (altered_table->versioned(VERS_TIMESTAMP))
4895     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4896 
4897   alter_table_operations inplace_offline_operations=
4898     ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE |
4899     ALTER_COLUMN_NAME |
4900     ALTER_RENAME_COLUMN |
4901     ALTER_CHANGE_COLUMN_DEFAULT |
4902     ALTER_COLUMN_DEFAULT |
4903     ALTER_COLUMN_OPTION |
4904     ALTER_CHANGE_CREATE_OPTION |
4905     ALTER_DROP_CHECK_CONSTRAINT |
4906     ALTER_PARTITIONED |
4907     ALTER_VIRTUAL_GCOL_EXPR |
4908     ALTER_RENAME |
4909     ALTER_RENAME_INDEX;
4910 
4911   /* Is there at least one operation that requires copy algorithm? */
4912   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4913     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4914 
4915   /*
4916     The following checks for changes related to ALTER_OPTIONS
4917 
4918     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4919     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4920     change column charsets and so not supported in-place through
4921     old API.
4922 
4923     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4924     not supported as in-place operations in old API either.
4925   */
4926   if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4927                                   HA_CREATE_USED_DEFAULT_CHARSET |
4928                                   HA_CREATE_USED_PACK_KEYS |
4929                                   HA_CREATE_USED_CHECKSUM |
4930                                   HA_CREATE_USED_MAX_ROWS) ||
4931       (table->s->row_type != create_info->row_type))
4932     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4933 
4934   uint table_changes= (ha_alter_info->handler_flags &
4935                        ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) ?
4936     IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4937   if (table->file->check_if_incompatible_data(create_info, table_changes)
4938       == COMPATIBLE_DATA_YES)
4939     DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
4940 
4941   DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4942 }
4943 
Alter_inplace_info(HA_CREATE_INFO * create_info_arg,Alter_info * alter_info_arg,KEY * key_info_arg,uint key_count_arg,partition_info * modified_part_info_arg,bool ignore_arg,bool error_non_empty)4944 Alter_inplace_info::Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
4945                      Alter_info *alter_info_arg,
4946                      KEY *key_info_arg, uint key_count_arg,
4947                      partition_info *modified_part_info_arg,
4948                      bool ignore_arg, bool error_non_empty)
4949     : create_info(create_info_arg),
4950     alter_info(alter_info_arg),
4951     key_info_buffer(key_info_arg),
4952     key_count(key_count_arg),
4953     index_drop_count(0),
4954     index_drop_buffer(nullptr),
4955     index_add_count(0),
4956     index_add_buffer(nullptr),
4957     rename_keys(current_thd->mem_root),
4958     handler_ctx(nullptr),
4959     group_commit_ctx(nullptr),
4960     handler_flags(0),
4961     modified_part_info(modified_part_info_arg),
4962     ignore(ignore_arg),
4963     online(false),
4964     unsupported_reason(nullptr),
4965     error_if_not_empty(error_non_empty)
4966   {}
4967 
report_unsupported_error(const char * not_supported,const char * try_instead) const4968 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4969                                                   const char *try_instead) const
4970 {
4971   if (unsupported_reason == NULL)
4972     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4973              not_supported, try_instead);
4974   else
4975     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4976              not_supported, unsupported_reason, try_instead);
4977 }
4978 
4979 
4980 /**
4981   Rename table: public interface.
4982 
4983   @sa handler::rename_table()
4984 */
4985 
4986 int
ha_rename_table(const char * from,const char * to)4987 handler::ha_rename_table(const char *from, const char *to)
4988 {
4989   DBUG_ASSERT(m_lock_type == F_UNLCK);
4990   mark_trx_read_write();
4991 
4992   return rename_table(from, to);
4993 }
4994 
4995 
4996 /**
4997   Drop table in the engine: public interface.
4998 
4999   @sa handler::drop_table()
5000 
5001   The difference between this and delete_table() is that the table is open in
5002   drop_table().
5003 */
5004 
5005 void
ha_drop_table(const char * name)5006 handler::ha_drop_table(const char *name)
5007 {
5008   DBUG_ASSERT(m_lock_type == F_UNLCK);
5009   if (check_if_updates_are_ignored("DROP"))
5010     return;
5011 
5012   mark_trx_read_write();
5013   drop_table(name);
5014 }
5015 
5016 
5017 /**
5018    Structure used during force drop table.
5019 */
5020 
5021 struct st_force_drop_table_params
5022 {
5023   const char *path;
5024   const LEX_CSTRING *db;
5025   const LEX_CSTRING *alias;
5026   int error;
5027   bool discovering;
5028 };
5029 
5030 
5031 /**
5032    Try to delete table from a given plugin
5033    Table types with discovery is ignored as these .frm files would have
5034    been created during discovery and thus doesn't need to be found
5035    for drop table force
5036 */
5037 
delete_table_force(THD * thd,plugin_ref plugin,void * arg)5038 static my_bool delete_table_force(THD *thd, plugin_ref plugin, void *arg)
5039 {
5040   handlerton *hton = plugin_hton(plugin);
5041   st_force_drop_table_params *param = (st_force_drop_table_params *)arg;
5042 
5043   if (param->discovering == (hton->discover_table != NULL) &&
5044       !(thd->slave_thread && (hton->flags & HTON_IGNORE_UPDATES)))
5045   {
5046     int error;
5047     error= ha_delete_table(thd, hton, param->path, param->db, param->alias, 0);
5048     if (error > 0 && !non_existing_table_error(error))
5049       param->error= error;
5050     if (error == 0)
5051     {
5052       if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
5053         thd->replication_flags |= OPTION_IF_EXISTS;
5054       param->error= 0;
5055       return TRUE;                                // Table was deleted
5056     }
5057   }
5058   return FALSE;
5059 }
5060 
5061 /**
5062    @brief
5063    Traverse all plugins to delete table when .frm file is missing.
5064 
5065    @return -1  Table was not found in any engine
5066    @return 0  Table was found in some engine and delete succeded
5067    @return #  Error from first engine that had a table but didn't succeed to
5068               delete the table
5069    @return HA_ERR_ROW_IS_REFERENCED if foreign key reference is encountered,
5070 
5071 */
5072 
ha_delete_table_force(THD * thd,const char * path,const LEX_CSTRING * db,const LEX_CSTRING * alias)5073 int ha_delete_table_force(THD *thd, const char *path, const LEX_CSTRING *db,
5074                           const LEX_CSTRING *alias)
5075 {
5076   st_force_drop_table_params param;
5077   Table_exists_error_handler no_such_table_handler;
5078   DBUG_ENTER("ha_delete_table_force");
5079 
5080   param.path=        path;
5081   param.db=          db;
5082   param.alias=       alias;
5083   param.error=       -1;                   // Table not found
5084   param.discovering= true;
5085 
5086   thd->push_internal_handler(&no_such_table_handler);
5087   if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN,
5088                      &param))
5089     param.error= 0;                            // Delete succeded
5090   else
5091   {
5092     param.discovering= false;
5093     if (plugin_foreach(thd, delete_table_force, MYSQL_STORAGE_ENGINE_PLUGIN,
5094                        &param))
5095       param.error= 0;                            // Delete succeded
5096   }
5097   thd->pop_internal_handler();
5098   DBUG_RETURN(param.error);
5099 }
5100 
5101 
5102 /**
5103   Create a table in the engine: public interface.
5104 
5105   @sa handler::create()
5106 */
5107 
5108 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info_arg)5109 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg)
5110 {
5111   DBUG_ASSERT(m_lock_type == F_UNLCK);
5112   mark_trx_read_write();
5113   int error= create(name, form, info_arg);
5114   if (!error &&
5115       !(info_arg->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER)))
5116     mysql_audit_create_table(form);
5117   return error;
5118 }
5119 
5120 
5121 /**
5122   Create handler files for CREATE TABLE: public interface.
5123 
5124   @sa handler::create_partitioning_metadata()
5125 */
5126 
5127 int
ha_create_partitioning_metadata(const char * name,const char * old_name,chf_create_flags action_flag)5128 handler::ha_create_partitioning_metadata(const char *name,
5129                                          const char *old_name,
5130                                          chf_create_flags action_flag)
5131 {
5132   /*
5133     Normally this is done when unlocked, but in fast_alter_partition_table,
5134     it is done on an already locked handler when preparing to alter/rename
5135     partitions.
5136   */
5137   DBUG_ASSERT(m_lock_type == F_UNLCK ||
5138               (!old_name && strcmp(name, table_share->path.str)));
5139 
5140 
5141   mark_trx_read_write();
5142   return create_partitioning_metadata(name, old_name, action_flag);
5143 }
5144 
5145 
5146 /**
5147   Change partitions: public interface.
5148 
5149   @sa handler::change_partitions()
5150 */
5151 
5152 int
ha_change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)5153 handler::ha_change_partitions(HA_CREATE_INFO *create_info,
5154                               const char *path,
5155                               ulonglong * const copied,
5156                               ulonglong * const deleted,
5157                               const uchar *pack_frm_data,
5158                               size_t pack_frm_len)
5159 {
5160   /*
5161     Must have at least RDLCK or be a TMP table. Read lock is needed to read
5162     from current partitions and write lock will be taken on new partitions.
5163   */
5164   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
5165               m_lock_type != F_UNLCK);
5166 
5167   mark_trx_read_write();
5168 
5169   return change_partitions(create_info, path, copied, deleted,
5170                            pack_frm_data, pack_frm_len);
5171 }
5172 
5173 
5174 /**
5175   Drop partitions: public interface.
5176 
5177   @sa handler::drop_partitions()
5178 */
5179 
5180 int
ha_drop_partitions(const char * path)5181 handler::ha_drop_partitions(const char *path)
5182 {
5183   DBUG_ASSERT(!table->db_stat);
5184 
5185   mark_trx_read_write();
5186 
5187   return drop_partitions(path);
5188 }
5189 
5190 
5191 /**
5192   Rename partitions: public interface.
5193 
5194   @sa handler::rename_partitions()
5195 */
5196 
5197 int
ha_rename_partitions(const char * path)5198 handler::ha_rename_partitions(const char *path)
5199 {
5200   DBUG_ASSERT(!table->db_stat);
5201 
5202   mark_trx_read_write();
5203 
5204   return rename_partitions(path);
5205 }
5206 
5207 
5208 /**
5209   Tell the storage engine that it is allowed to "disable transaction" in the
5210   handler. It is a hint that ACID is not required - it was used in NDB for
5211   ALTER TABLE, for example, when data are copied to temporary table.
5212   A storage engine may treat this hint any way it likes. NDB for example
5213   started to commit every now and then automatically.
5214   This hint can be safely ignored.
5215 */
ha_enable_transaction(THD * thd,bool on)5216 int ha_enable_transaction(THD *thd, bool on)
5217 {
5218   int error=0;
5219   DBUG_ENTER("ha_enable_transaction");
5220   DBUG_PRINT("enter", ("on: %d", (int) on));
5221 
5222   if ((thd->transaction->on= on))
5223   {
5224     /*
5225       Now all storage engines should have transaction handling enabled.
5226       But some may have it enabled all the time - "disabling" transactions
5227       is an optimization hint that storage engine is free to ignore.
5228       So, let's commit an open transaction (if any) now.
5229     */
5230     if (likely(!(error= ha_commit_trans(thd, 0))))
5231       error= trans_commit_implicit(thd);
5232   }
5233   DBUG_RETURN(error);
5234 }
5235 
index_next_same(uchar * buf,const uchar * key,uint keylen)5236 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5237 {
5238   int error;
5239   DBUG_ENTER("handler::index_next_same");
5240   if (!(error=index_next(buf)))
5241   {
5242     my_ptrdiff_t ptrdiff= buf - table->record[0];
5243     uchar *UNINIT_VAR(save_record_0);
5244     KEY *UNINIT_VAR(key_info);
5245     KEY_PART_INFO *UNINIT_VAR(key_part);
5246     KEY_PART_INFO *UNINIT_VAR(key_part_end);
5247 
5248     /*
5249       key_cmp_if_same() compares table->record[0] against 'key'.
5250       In parts it uses table->record[0] directly, in parts it uses
5251       field objects with their local pointers into table->record[0].
5252       If 'buf' is distinct from table->record[0], we need to move
5253       all record references. This is table->record[0] itself and
5254       the field pointers of the fields used in this key.
5255     */
5256     if (ptrdiff)
5257     {
5258       save_record_0= table->record[0];
5259       table->record[0]= buf;
5260       key_info= table->key_info + active_index;
5261       key_part= key_info->key_part;
5262       key_part_end= key_part + key_info->user_defined_key_parts;
5263       for (; key_part < key_part_end; key_part++)
5264       {
5265         DBUG_ASSERT(key_part->field);
5266         key_part->field->move_field_offset(ptrdiff);
5267       }
5268     }
5269 
5270     if (key_cmp_if_same(table, key, active_index, keylen))
5271     {
5272       table->status=STATUS_NOT_FOUND;
5273       error=HA_ERR_END_OF_FILE;
5274     }
5275 
5276     /* Move back if necessary. */
5277     if (ptrdiff)
5278     {
5279       table->record[0]= save_record_0;
5280       for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5281         key_part->field->move_field_offset(-ptrdiff);
5282     }
5283   }
5284   DBUG_PRINT("return",("%i", error));
5285   DBUG_RETURN(error);
5286 }
5287 
5288 
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)5289 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
5290                                          uint part_id)
5291 {
5292   info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
5293        HA_STATUS_NO_LOCK);
5294   stat_info->records=              stats.records;
5295   stat_info->mean_rec_length=      stats.mean_rec_length;
5296   stat_info->data_file_length=     stats.data_file_length;
5297   stat_info->max_data_file_length= stats.max_data_file_length;
5298   stat_info->index_file_length=    stats.index_file_length;
5299   stat_info->max_index_file_length=stats.max_index_file_length;
5300   stat_info->delete_length=        stats.delete_length;
5301   stat_info->create_time=          stats.create_time;
5302   stat_info->update_time=          stats.update_time;
5303   stat_info->check_time=           stats.check_time;
5304   stat_info->check_sum=            stats.checksum;
5305   stat_info->check_sum_null=       stats.checksum_null;
5306 }
5307 
5308 
5309 /*
5310   Updates the global table stats with the TABLE this handler represents
5311 */
5312 
update_global_table_stats()5313 void handler::update_global_table_stats()
5314 {
5315   TABLE_STATS * table_stats;
5316 
5317   status_var_add(table->in_use->status_var.rows_read, rows_read);
5318   DBUG_ASSERT(rows_tmp_read == 0);
5319 
5320   if (!table->in_use->userstat_running)
5321   {
5322     rows_read= rows_changed= 0;
5323     return;
5324   }
5325 
5326   if (rows_read + rows_changed == 0)
5327     return;                                     // Nothing to update.
5328 
5329   DBUG_ASSERT(table->s);
5330   DBUG_ASSERT(table->s->table_cache_key.str);
5331 
5332   mysql_mutex_lock(&LOCK_global_table_stats);
5333   /* Gets the global table stats, creating one if necessary. */
5334   if (!(table_stats= (TABLE_STATS*)
5335         my_hash_search(&global_table_stats,
5336                     (uchar*) table->s->table_cache_key.str,
5337                     table->s->table_cache_key.length)))
5338   {
5339     if (!(table_stats = ((TABLE_STATS*)
5340                          my_malloc(PSI_INSTRUMENT_ME, sizeof(TABLE_STATS),
5341                                    MYF(MY_WME | MY_ZEROFILL)))))
5342     {
5343       /* Out of memory error already given */
5344       goto end;
5345     }
5346     memcpy(table_stats->table, table->s->table_cache_key.str,
5347            table->s->table_cache_key.length);
5348     table_stats->table_name_length= (uint)table->s->table_cache_key.length;
5349     table_stats->engine_type= ht->db_type;
5350     /* No need to set variables to 0, as we use MY_ZEROFILL above */
5351 
5352     if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
5353     {
5354       /* Out of memory error is already given */
5355       my_free(table_stats);
5356       goto end;
5357     }
5358   }
5359   // Updates the global table stats.
5360   table_stats->rows_read+=    rows_read;
5361   table_stats->rows_changed+= rows_changed;
5362   table_stats->rows_changed_x_indexes+= (rows_changed *
5363                                          (table->s->keys ? table->s->keys :
5364                                           1));
5365   rows_read= rows_changed= 0;
5366 end:
5367   mysql_mutex_unlock(&LOCK_global_table_stats);
5368 }
5369 
5370 
5371 /*
5372   Updates the global index stats with this handler's accumulated index reads.
5373 */
5374 
update_global_index_stats()5375 void handler::update_global_index_stats()
5376 {
5377   DBUG_ASSERT(table->s);
5378 
5379   if (!table->in_use->userstat_running)
5380   {
5381     /* Reset all index read values */
5382     bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
5383     return;
5384   }
5385 
5386   for (uint index = 0; index < table->s->keys; index++)
5387   {
5388     if (index_rows_read[index])
5389     {
5390       INDEX_STATS* index_stats;
5391       size_t key_length;
5392       KEY *key_info = &table->key_info[index];  // Rows were read using this
5393 
5394       DBUG_ASSERT(key_info->cache_name);
5395       if (!key_info->cache_name)
5396         continue;
5397       key_length= table->s->table_cache_key.length + key_info->name.length + 1;
5398       mysql_mutex_lock(&LOCK_global_index_stats);
5399       // Gets the global index stats, creating one if necessary.
5400       if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
5401                                                     key_info->cache_name,
5402                                                     key_length)))
5403       {
5404         if (!(index_stats = ((INDEX_STATS*)
5405                              my_malloc(PSI_INSTRUMENT_ME, sizeof(INDEX_STATS),
5406                                        MYF(MY_WME | MY_ZEROFILL)))))
5407           goto end;                             // Error is already given
5408 
5409         memcpy(index_stats->index, key_info->cache_name, key_length);
5410         index_stats->index_name_length= key_length;
5411         if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
5412         {
5413           my_free(index_stats);
5414           goto end;
5415         }
5416       }
5417       /* Updates the global index stats. */
5418       index_stats->rows_read+= index_rows_read[index];
5419       index_rows_read[index]= 0;
5420 end:
5421       mysql_mutex_unlock(&LOCK_global_index_stats);
5422     }
5423   }
5424 }
5425 
5426 
flush_checksum(ha_checksum * row_crc,uchar ** checksum_start,size_t * checksum_length)5427 static void flush_checksum(ha_checksum *row_crc, uchar **checksum_start,
5428                            size_t *checksum_length)
5429 {
5430   if (*checksum_start)
5431   {
5432     *row_crc= my_checksum(*row_crc, *checksum_start, *checksum_length);
5433     *checksum_start= NULL;
5434     *checksum_length= 0;
5435   }
5436 }
5437 
5438 
5439 /* calculating table's checksum */
calculate_checksum()5440 int handler::calculate_checksum()
5441 {
5442   int error;
5443   THD *thd=ha_thd();
5444   DBUG_ASSERT(table->s->last_null_bit_pos < 8);
5445   uchar null_mask= table->s->last_null_bit_pos
5446                    ? 256 -  (1 << table->s->last_null_bit_pos) : 0;
5447 
5448   table->use_all_stored_columns();
5449   stats.checksum= 0;
5450 
5451   if ((error= ha_rnd_init(1)))
5452     return error;
5453 
5454   for (;;)
5455   {
5456     if (thd->killed)
5457       return HA_ERR_ABORTED_BY_USER;
5458 
5459     ha_checksum row_crc= 0;
5460     error= ha_rnd_next(table->record[0]);
5461     if (error)
5462       break;
5463 
5464     if (table->s->null_bytes)
5465     {
5466       /* fix undefined null bits */
5467       table->record[0][table->s->null_bytes-1] |= null_mask;
5468       if (!(table->s->db_create_options & HA_OPTION_PACK_RECORD))
5469         table->record[0][0] |= 1;
5470 
5471       row_crc= my_checksum(row_crc, table->record[0], table->s->null_bytes);
5472     }
5473 
5474     uchar *checksum_start= NULL;
5475     size_t checksum_length= 0;
5476     for (uint i= 0; i < table->s->fields; i++ )
5477     {
5478       Field *f= table->field[i];
5479 
5480       if (! thd->variables.old_mode && f->is_real_null(0))
5481       {
5482         flush_checksum(&row_crc, &checksum_start, &checksum_length);
5483         continue;
5484       }
5485      /*
5486        BLOB and VARCHAR have pointers in their field, we must convert
5487        to string; GEOMETRY is implemented on top of BLOB.
5488        BIT may store its data among NULL bits, convert as well.
5489      */
5490       switch (f->type()) {
5491         case MYSQL_TYPE_BLOB:
5492         case MYSQL_TYPE_VARCHAR:
5493         case MYSQL_TYPE_GEOMETRY:
5494         case MYSQL_TYPE_BIT:
5495         {
5496           flush_checksum(&row_crc, &checksum_start, &checksum_length);
5497           String tmp;
5498           f->val_str(&tmp);
5499           row_crc= my_checksum(row_crc, (uchar*) tmp.ptr(), tmp.length());
5500           break;
5501         }
5502         default:
5503           if (!checksum_start)
5504             checksum_start= f->ptr;
5505           DBUG_ASSERT(checksum_start + checksum_length == f->ptr);
5506           checksum_length+= f->pack_length();
5507           break;
5508       }
5509     }
5510     flush_checksum(&row_crc, &checksum_start, &checksum_length);
5511 
5512     stats.checksum+= row_crc;
5513   }
5514   ha_rnd_end();
5515   return error == HA_ERR_END_OF_FILE ? 0 : error;
5516 }
5517 
5518 
5519 /****************************************************************************
5520 ** Some general functions that isn't in the handler class
5521 ****************************************************************************/
5522 
5523 /**
5524   Initiates table-file and calls appropriate database-creator.
5525 
5526   @retval
5527    0  ok
5528   @retval
5529    1  error
5530 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,LEX_CUSTRING * frm)5531 int ha_create_table(THD *thd, const char *path,
5532                     const char *db, const char *table_name,
5533                     HA_CREATE_INFO *create_info, LEX_CUSTRING *frm)
5534 {
5535   int error= 1;
5536   TABLE table;
5537   char name_buff[FN_REFLEN];
5538   const char *name;
5539   TABLE_SHARE share;
5540   Abort_on_warning_instant_set old_abort_on_warning(thd, 0);
5541   bool temp_table __attribute__((unused)) =
5542     create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER);
5543   DBUG_ENTER("ha_create_table");
5544 
5545   init_tmp_table_share(thd, &share, db, 0, table_name, path);
5546 
5547   if (frm)
5548   {
5549     bool write_frm_now= !create_info->db_type->discover_table &&
5550                         !create_info->tmp_table();
5551 
5552     share.frm_image= frm;
5553 
5554     // open an frm image
5555     if (share.init_from_binary_frm_image(thd, write_frm_now,
5556                                          frm->str, frm->length))
5557       goto err;
5558   }
5559   else
5560   {
5561     // open an frm file
5562     share.db_plugin= ha_lock_engine(thd, create_info->db_type);
5563 
5564     if (open_table_def(thd, &share))
5565       goto err;
5566   }
5567 
5568   share.m_psi= PSI_CALL_get_table_share(temp_table, &share);
5569 
5570   if (open_table_from_share(thd, &share, &empty_clex_str, 0, READ_ALL, 0,
5571                             &table, true))
5572     goto err;
5573 
5574   update_create_info_from_table(create_info, &table);
5575 
5576   name= get_canonical_filename(table.file, share.path.str, name_buff);
5577 
5578   error= table.file->ha_create(name, &table, create_info);
5579 
5580   if (unlikely(error))
5581   {
5582     if (!thd->is_error())
5583       my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table_name, error);
5584     table.file->print_error(error, MYF(ME_WARNING));
5585     PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length,
5586                               share.table_name.str, (uint)share.table_name.length);
5587   }
5588 
5589   (void) closefrm(&table);
5590 
5591 err:
5592   free_table_share(&share);
5593   DBUG_RETURN(error != 0);
5594 }
5595 
init()5596 void st_ha_check_opt::init()
5597 {
5598   flags= sql_flags= 0;
5599   start_time= my_time(0);
5600 }
5601 
5602 
5603 /*****************************************************************************
5604   Key cache handling.
5605 
5606   This code is only relevant for ISAM/MyISAM tables
5607 
5608   key_cache->cache may be 0 only in the case where a key cache is not
5609   initialized or when we where not able to init the key cache in a previous
5610   call to ha_init_key_cache() (probably out of memory)
5611 *****************************************************************************/
5612 
5613 /**
5614   Init a key cache if it has not been initied before.
5615 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache,void * unused)5616 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused
5617                       __attribute__((unused)))
5618 {
5619   DBUG_ENTER("ha_init_key_cache");
5620 
5621   if (!key_cache->key_cache_inited)
5622   {
5623     mysql_mutex_lock(&LOCK_global_system_variables);
5624     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5625     uint tmp_block_size= (uint) key_cache->param_block_size;
5626     uint division_limit= (uint)key_cache->param_division_limit;
5627     uint age_threshold=  (uint)key_cache->param_age_threshold;
5628     uint partitions=     (uint)key_cache->param_partitions;
5629     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5630     mysql_mutex_unlock(&LOCK_global_system_variables);
5631     DBUG_RETURN(!init_key_cache(key_cache,
5632 				tmp_block_size,
5633 				tmp_buff_size,
5634 				division_limit, age_threshold,
5635                                 changed_blocks_hash_size,
5636                                 partitions));
5637   }
5638   DBUG_RETURN(0);
5639 }
5640 
5641 
5642 /**
5643   Resize key cache.
5644 */
ha_resize_key_cache(KEY_CACHE * key_cache)5645 int ha_resize_key_cache(KEY_CACHE *key_cache)
5646 {
5647   DBUG_ENTER("ha_resize_key_cache");
5648 
5649   if (key_cache->key_cache_inited)
5650   {
5651     mysql_mutex_lock(&LOCK_global_system_variables);
5652     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5653     long tmp_block_size= (long) key_cache->param_block_size;
5654     uint division_limit= (uint)key_cache->param_division_limit;
5655     uint age_threshold=  (uint)key_cache->param_age_threshold;
5656     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5657     mysql_mutex_unlock(&LOCK_global_system_variables);
5658     DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5659 				  tmp_buff_size,
5660 				  division_limit, age_threshold,
5661                                   changed_blocks_hash_size));
5662   }
5663   DBUG_RETURN(0);
5664 }
5665 
5666 
5667 /**
5668   Change parameters for key cache (like division_limit)
5669 */
ha_change_key_cache_param(KEY_CACHE * key_cache)5670 int ha_change_key_cache_param(KEY_CACHE *key_cache)
5671 {
5672   DBUG_ENTER("ha_change_key_cache_param");
5673 
5674   if (key_cache->key_cache_inited)
5675   {
5676     mysql_mutex_lock(&LOCK_global_system_variables);
5677     uint division_limit= (uint)key_cache->param_division_limit;
5678     uint age_threshold=  (uint)key_cache->param_age_threshold;
5679     mysql_mutex_unlock(&LOCK_global_system_variables);
5680     change_key_cache_param(key_cache, division_limit, age_threshold);
5681   }
5682   DBUG_RETURN(0);
5683 }
5684 
5685 
5686 /**
5687   Repartition key cache
5688 */
ha_repartition_key_cache(KEY_CACHE * key_cache)5689 int ha_repartition_key_cache(KEY_CACHE *key_cache)
5690 {
5691   DBUG_ENTER("ha_repartition_key_cache");
5692 
5693   if (key_cache->key_cache_inited)
5694   {
5695     mysql_mutex_lock(&LOCK_global_system_variables);
5696     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5697     long tmp_block_size= (long) key_cache->param_block_size;
5698     uint division_limit= (uint)key_cache->param_division_limit;
5699     uint age_threshold=  (uint)key_cache->param_age_threshold;
5700     uint partitions=     (uint)key_cache->param_partitions;
5701     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5702     mysql_mutex_unlock(&LOCK_global_system_variables);
5703     DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
5704 				       tmp_buff_size,
5705 				       division_limit, age_threshold,
5706                                        changed_blocks_hash_size,
5707                                        partitions));
5708   }
5709   DBUG_RETURN(0);
5710 }
5711 
5712 
5713 /**
5714   Move all tables from one key cache to another one.
5715 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5716 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5717 			KEY_CACHE *new_key_cache)
5718 {
5719   mi_change_key_cache(old_key_cache, new_key_cache);
5720   return 0;
5721 }
5722 
5723 
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5724 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5725                                    void *arg)
5726 {
5727   TABLE_SHARE *share= (TABLE_SHARE *)arg;
5728   handlerton *hton= plugin_hton(plugin);
5729   if (hton->discover_table)
5730   {
5731     share->db_plugin= plugin;
5732     int error= hton->discover_table(hton, thd, share);
5733     if (error != HA_ERR_NO_SUCH_TABLE)
5734     {
5735       if (unlikely(error))
5736       {
5737         if (!share->error)
5738         {
5739           share->error= OPEN_FRM_ERROR_ALREADY_ISSUED;
5740           plugin_unlock(0, share->db_plugin);
5741         }
5742 
5743         /*
5744           report an error, unless it is "generic" and a more
5745           specific one was already reported
5746         */
5747         if (error != HA_ERR_GENERIC || !thd->is_error())
5748           my_error(ER_GET_ERRNO, MYF(0), error, plugin_name(plugin)->str);
5749         share->db_plugin= 0;
5750       }
5751       else
5752         share->error= OPEN_FRM_OK;
5753 
5754       status_var_increment(thd->status_var.ha_discover_count);
5755       return TRUE; // abort the search
5756     }
5757     share->db_plugin= 0;
5758   }
5759 
5760   DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);
5761   return FALSE;    // continue with the next engine
5762 }
5763 
ha_discover_table(THD * thd,TABLE_SHARE * share)5764 int ha_discover_table(THD *thd, TABLE_SHARE *share)
5765 {
5766   DBUG_ENTER("ha_discover_table");
5767   int found;
5768 
5769   DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);   // share is not OK yet
5770 
5771   if (!engines_with_discover)
5772     found= FALSE;
5773   else if (share->db_plugin)
5774     found= discover_handlerton(thd, share->db_plugin, share);
5775   else
5776     found= plugin_foreach(thd, discover_handlerton,
5777                         MYSQL_STORAGE_ENGINE_PLUGIN, share);
5778 
5779   if (!found)
5780     open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found
5781 
5782   DBUG_RETURN(share->error != OPEN_FRM_OK);
5783 }
5784 
file_ext_exists(char * path,size_t path_len,const char * ext)5785 static my_bool file_ext_exists(char *path, size_t path_len, const char *ext)
5786 {
5787   strmake(path + path_len, ext, FN_REFLEN - path_len);
5788   return !access(path, F_OK);
5789 }
5790 
5791 struct st_discover_existence_args
5792 {
5793   char *path;
5794   size_t  path_len;
5795   const char *db, *table_name;
5796   handlerton *hton;
5797   bool frm_exists;
5798 };
5799 
discover_existence(THD * thd,plugin_ref plugin,void * arg)5800 static my_bool discover_existence(THD *thd, plugin_ref plugin,
5801                                   void *arg)
5802 {
5803   st_discover_existence_args *args= (st_discover_existence_args*)arg;
5804   handlerton *ht= plugin_hton(plugin);
5805   if (!ht->discover_table_existence)
5806     return args->frm_exists;
5807 
5808   args->hton= ht;
5809 
5810   if (ht->discover_table_existence == ext_based_existence)
5811     return file_ext_exists(args->path, args->path_len,
5812                            ht->tablefile_extensions[0]);
5813 
5814   return ht->discover_table_existence(ht, args->db, args->table_name);
5815 }
5816 
5817 
5818 /**
5819   Check if a given table exists, without doing a full discover, if possible
5820 
5821   If the 'hton' is not NULL, it's set to the handlerton of the storage engine
5822   of this table, or to view_pseudo_hton if the frm belongs to a view.
5823 
5824   This function takes discovery correctly into account. If frm is found,
5825   it discovers the table to make sure it really exists in the engine.
5826   If no frm is found it discovers the table, in case it still exists in
5827   the engine.
5828 
5829   While it tries to cut corners (don't open .frm if no discovering engine is
5830   enabled, no full discovery if all discovering engines support
5831   discover_table_existence, etc), it still *may* be quite expensive
5832   and must be used sparingly.
5833 
5834   @retval true    Table exists (even if the error occurred, like bad frm)
5835   @retval false   Table does not exist (one can do CREATE TABLE table_name)
5836 
5837   @note if frm exists and the table in engine doesn't, *hton will be set,
5838         but the return value will be false.
5839 
5840   @note if frm file exists, but the table cannot be opened (engine not
5841         loaded, frm is invalid), the return value will be true, but
5842         *hton will be NULL.
5843 */
5844 
ha_table_exists(THD * thd,const LEX_CSTRING * db,const LEX_CSTRING * table_name,handlerton ** hton,bool * is_sequence)5845 bool ha_table_exists(THD *thd, const LEX_CSTRING *db,
5846                      const LEX_CSTRING *table_name,
5847                      handlerton **hton, bool *is_sequence)
5848 {
5849   handlerton *dummy;
5850   bool dummy2;
5851   DBUG_ENTER("ha_table_exists");
5852 
5853   if (hton)
5854     *hton= 0;
5855   else if (engines_with_discover)
5856     hton= &dummy;
5857   if (!is_sequence)
5858     is_sequence= &dummy2;
5859   *is_sequence= 0;
5860 
5861   TDC_element *element= tdc_lock_share(thd, db->str, table_name->str);
5862   if (element && element != MY_ERRPTR)
5863   {
5864     if (hton)
5865       *hton= element->share->db_type();
5866     *is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE;
5867     tdc_unlock_share(element);
5868     DBUG_RETURN(TRUE);
5869   }
5870 
5871   char path[FN_REFLEN + 1];
5872   size_t path_len = build_table_filename(path, sizeof(path) - 1,
5873                                          db->str, table_name->str, "", 0);
5874   st_discover_existence_args args= {path, path_len, db->str, table_name->str, 0, true};
5875 
5876   if (file_ext_exists(path, path_len, reg_ext))
5877   {
5878     bool exists= true;
5879     if (hton)
5880     {
5881       char engine_buf[NAME_CHAR_LEN + 1];
5882       LEX_CSTRING engine= { engine_buf, 0 };
5883       Table_type type= dd_frm_type(thd, path, &engine);
5884 
5885       switch (type) {
5886       case TABLE_TYPE_UNKNOWN:
5887         DBUG_PRINT("exit", ("Exist, cannot be opened"));
5888         DBUG_RETURN(true);                      // Frm exists
5889       case TABLE_TYPE_VIEW:
5890         *hton= view_pseudo_hton;
5891         DBUG_PRINT("exit", ("Exist, view"));
5892         DBUG_RETURN(true);                      // Frm exists
5893       case TABLE_TYPE_SEQUENCE:
5894         *is_sequence= true;
5895         /* fall through */
5896       case TABLE_TYPE_NORMAL:
5897         {
5898           plugin_ref p=  plugin_lock_by_name(thd, &engine,
5899                                              MYSQL_STORAGE_ENGINE_PLUGIN);
5900           *hton= p ? plugin_hton(p) : NULL;
5901           if (*hton)      // verify that the table really exists
5902             exists= discover_existence(thd, p, &args);
5903         }
5904       }
5905     }
5906     DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist"));
5907     DBUG_RETURN(exists);
5908   }
5909 
5910   args.frm_exists= false;
5911   if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN,
5912                      &args))
5913   {
5914     if (hton)
5915       *hton= args.hton;
5916     DBUG_PRINT("exit", ("discovery found file"));
5917     DBUG_RETURN(TRUE);
5918   }
5919 
5920   if (need_full_discover_for_existence)
5921   {
5922     TABLE_LIST table;
5923     bool exists;
5924     uint flags = GTS_TABLE | GTS_VIEW;
5925 
5926     if (!hton)
5927       flags|= GTS_NOLOCK;
5928 
5929     Table_exists_error_handler no_such_table_handler;
5930     thd->push_internal_handler(&no_such_table_handler);
5931     table.init_one_table(db, table_name, 0, TL_READ);
5932     TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags);
5933     thd->pop_internal_handler();
5934 
5935     if (hton && share)
5936     {
5937       *hton= share->db_type();
5938       tdc_release_share(share);
5939     }
5940 
5941     // the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else
5942     exists= !no_such_table_handler.safely_trapped_errors();
5943     DBUG_PRINT("exit", (exists ? "Exists" : "Does not exist"));
5944     DBUG_RETURN(exists);
5945   }
5946 
5947   DBUG_PRINT("exit", ("Does not exist"));
5948   DBUG_RETURN(FALSE);
5949 }
5950 
5951 
5952 /*
5953   Check if the CREATE/ALTER table should be ignored
5954   This could happen for slaves where the table is shared between master
5955   and slave
5956 
5957   If statement is ignored, write a note
5958 */
5959 
check_if_updates_are_ignored(const char * op) const5960 bool handler::check_if_updates_are_ignored(const char *op) const
5961 {
5962   return ha_check_if_updates_are_ignored(table->in_use, ht, op);
5963 }
5964 
5965 
ha_check_if_updates_are_ignored(THD * thd,handlerton * hton,const char * op)5966 bool ha_check_if_updates_are_ignored(THD *thd, handlerton *hton,
5967                                      const char *op)
5968 {
5969   DBUG_ENTER("ha_check_if_updates_are_ignored");
5970   if (!thd->slave_thread || !(hton= ha_checktype(thd, hton, 1)))
5971     DBUG_RETURN(0);                                   // Not slave or no engine
5972   if (!(hton->flags & HTON_IGNORE_UPDATES))
5973     DBUG_RETURN(0);                                   // Not shared table
5974   my_error(ER_SLAVE_IGNORED_SHARED_TABLE, MYF(ME_NOTE), op);
5975   DBUG_RETURN(1);
5976 }
5977 
5978 
5979 /**
5980   Discover all table names in a given database
5981 */
5982 extern "C" {
5983 
cmp_file_names(const void * a,const void * b)5984 static int cmp_file_names(const void *a, const void *b)
5985 {
5986   CHARSET_INFO *cs= character_set_filesystem;
5987   char *aa= ((FILEINFO *)a)->name;
5988   char *bb= ((FILEINFO *)b)->name;
5989   return cs->strnncoll(aa, strlen(aa), bb, strlen(bb));
5990 }
5991 
cmp_table_names(LEX_CSTRING * const * a,LEX_CSTRING * const * b)5992 static int cmp_table_names(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5993 {
5994   return my_charset_bin.strnncoll((*a)->str, (*a)->length,
5995                                   (*b)->str, (*b)->length);
5996 }
5997 
5998 #ifndef DBUG_OFF
cmp_table_names_desc(LEX_CSTRING * const * a,LEX_CSTRING * const * b)5999 static int cmp_table_names_desc(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
6000 {
6001   return -cmp_table_names(a, b);
6002 }
6003 #endif
6004 
6005 }
6006 
Discovered_table_list(THD * thd_arg,Dynamic_array<LEX_CSTRING * > * tables_arg,const LEX_CSTRING * wild_arg)6007 Discovered_table_list::Discovered_table_list(THD *thd_arg,
6008                  Dynamic_array<LEX_CSTRING*> *tables_arg,
6009                  const LEX_CSTRING *wild_arg) :
6010   thd(thd_arg), with_temps(false), tables(tables_arg)
6011 {
6012   if (wild_arg->str && wild_arg->str[0])
6013   {
6014     wild= wild_arg->str;
6015     wend= wild + wild_arg->length;
6016   }
6017   else
6018     wild= 0;
6019 }
6020 
add_table(const char * tname,size_t tlen)6021 bool Discovered_table_list::add_table(const char *tname, size_t tlen)
6022 {
6023   /*
6024     TODO Check with_temps and filter out temp tables.
6025     Implement the check, when we'll have at least one affected engine (with
6026     custom discover_table_names() method, that calls add_table() directly).
6027     Note: avoid comparing the same name twice (here and in add_file).
6028   */
6029   if (wild && table_alias_charset->wildcmp(tname, tname + tlen, wild, wend,
6030                                            wild_prefix, wild_one, wild_many))
6031       return 0;
6032 
6033   LEX_CSTRING *name= thd->make_clex_string(tname, tlen);
6034   if (!name || tables->append(name))
6035     return 1;
6036   return 0;
6037 }
6038 
add_file(const char * fname)6039 bool Discovered_table_list::add_file(const char *fname)
6040 {
6041   bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == 0;
6042 
6043   if (is_temp && !with_temps)
6044     return 0;
6045 
6046   char tname[SAFE_NAME_LEN + 1];
6047   size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp);
6048   return add_table(tname, tlen);
6049 }
6050 
6051 
sort()6052 void Discovered_table_list::sort()
6053 {
6054   tables->sort(cmp_table_names);
6055 }
6056 
6057 
6058 #ifndef DBUG_OFF
sort_desc()6059 void Discovered_table_list::sort_desc()
6060 {
6061   tables->sort(cmp_table_names_desc);
6062 }
6063 #endif
6064 
6065 
remove_duplicates()6066 void Discovered_table_list::remove_duplicates()
6067 {
6068   LEX_CSTRING **src= tables->front();
6069   LEX_CSTRING **dst= src;
6070   sort();
6071   while (++dst <= tables->back())
6072   {
6073     LEX_CSTRING *s= *src, *d= *dst;
6074     DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= 0);
6075     if ((s->length != d->length || strncmp(s->str, d->str, d->length)))
6076     {
6077       src++;
6078       if (src != dst)
6079         *src= *dst;
6080     }
6081   }
6082   tables->elements(src - tables->front() + 1);
6083 }
6084 
6085 struct st_discover_names_args
6086 {
6087   LEX_CSTRING *db;
6088   MY_DIR *dirp;
6089   Discovered_table_list *result;
6090   uint possible_duplicates;
6091 };
6092 
discover_names(THD * thd,plugin_ref plugin,void * arg)6093 static my_bool discover_names(THD *thd, plugin_ref plugin,
6094                               void *arg)
6095 {
6096   st_discover_names_args *args= (st_discover_names_args *)arg;
6097   handlerton *ht= plugin_hton(plugin);
6098 
6099   if (ht->discover_table_names)
6100   {
6101     size_t old_elements= args->result->tables->elements();
6102     if (ht->discover_table_names(ht, args->db, args->dirp, args->result))
6103       return 1;
6104 
6105     /*
6106       hton_ext_based_table_discovery never discovers a table that has
6107       a corresponding .frm file; but custom engine discover methods might
6108     */
6109     if (ht->discover_table_names != hton_ext_based_table_discovery)
6110       args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements);
6111   }
6112 
6113   return 0;
6114 }
6115 
6116 /**
6117   Return the list of tables
6118 
6119   @param thd
6120   @param db         database to look into
6121   @param dirp       list of files in this database (as returned by my_dir())
6122   @param result     the object to return the list of files in
6123   @param reusable   if true, on return, 'dirp' will be a valid list of all
6124                     non-table files. If false, discovery will work much faster,
6125                     but it will leave 'dirp' corrupted and completely unusable,
6126                     only good for my_dirend().
6127 
6128   Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true
6129   for DROP DATABASE (as it needs to know and delete non-table files).
6130 */
6131 
ha_discover_table_names(THD * thd,LEX_CSTRING * db,MY_DIR * dirp,Discovered_table_list * result,bool reusable)6132 int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp,
6133                             Discovered_table_list *result, bool reusable)
6134 {
6135   int error;
6136   DBUG_ENTER("ha_discover_table_names");
6137 
6138   if (engines_with_discover_file_names == 0 && !reusable)
6139   {
6140     st_discover_names_args args= {db, NULL, result, 0};
6141     error= ext_table_discovery_simple(dirp, result) ||
6142            plugin_foreach(thd, discover_names,
6143                             MYSQL_STORAGE_ENGINE_PLUGIN, &args);
6144     if (args.possible_duplicates > 0)
6145       result->remove_duplicates();
6146   }
6147   else
6148   {
6149     st_discover_names_args args= {db, dirp, result, 0};
6150 
6151     /* extension_based_table_discovery relies on dirp being sorted */
6152     my_qsort(dirp->dir_entry, dirp->number_of_files,
6153              sizeof(FILEINFO), cmp_file_names);
6154 
6155     error= extension_based_table_discovery(dirp, reg_ext, result) ||
6156            plugin_foreach(thd, discover_names,
6157                             MYSQL_STORAGE_ENGINE_PLUGIN, &args);
6158     if (args.possible_duplicates > 0)
6159       result->remove_duplicates();
6160   }
6161 
6162   DBUG_RETURN(error);
6163 }
6164 
6165 
6166 /*
6167 int handler::pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
6168                                         KEY_MULTI_RANGE *ranges,
6169                                         uint range_count,
6170                                         bool sorted, HANDLER_BUFFER *buffer,
6171                                         bool use_parallel)
6172 {
6173   int result;
6174   DBUG_ENTER("handler::pre_read_multi_range_first");
6175   result = pre_read_range_first(ranges->start_key.keypart_map ?
6176                                 &ranges->start_key : 0,
6177                                 ranges->end_key.keypart_map ?
6178                                 &ranges->end_key : 0,
6179                                 test(ranges->range_flag & EQ_RANGE),
6180                                 sorted,
6181                                 use_parallel);
6182   DBUG_RETURN(result);
6183 }
6184 */
6185 
6186 
6187 /**
6188   Read first row between two ranges.
6189   Store ranges for future calls to read_range_next.
6190 
6191   @param start_key		Start key. Is 0 if no min range
6192   @param end_key		End key.  Is 0 if no max range
6193   @param eq_range_arg	        Set to 1 if start_key == end_key
6194   @param sorted		Set to 1 if result should be sorted per key
6195 
6196   @note
6197     Record is read into table->record[0]
6198 
6199   @retval
6200     0			Found row
6201   @retval
6202     HA_ERR_END_OF_FILE	No rows in range
6203   @retval
6204     \#			Error code
6205 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)6206 int handler::read_range_first(const key_range *start_key,
6207 			      const key_range *end_key,
6208 			      bool eq_range_arg, bool sorted)
6209 {
6210   int result;
6211   DBUG_ENTER("handler::read_range_first");
6212 
6213   eq_range= eq_range_arg;
6214   set_end_range(end_key);
6215   range_key_part= table->key_info[active_index].key_part;
6216 
6217   if (!start_key)			// Read first record
6218     result= ha_index_first(table->record[0]);
6219   else
6220     result= ha_index_read_map(table->record[0],
6221                               start_key->key,
6222                               start_key->keypart_map,
6223                               start_key->flag);
6224   if (result)
6225     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
6226 		? HA_ERR_END_OF_FILE
6227 		: result);
6228 
6229   if (compare_key(end_range) <= 0)
6230   {
6231     DBUG_RETURN(0);
6232   }
6233   else
6234   {
6235     /*
6236       The last read row does not fall in the range. So request
6237       storage engine to release row lock if possible.
6238     */
6239     unlock_row();
6240     DBUG_RETURN(HA_ERR_END_OF_FILE);
6241   }
6242 }
6243 
6244 
6245 /**
6246   Read next row between two ranges.
6247 
6248   @note
6249     Record is read into table->record[0]
6250 
6251   @retval
6252     0			Found row
6253   @retval
6254     HA_ERR_END_OF_FILE	No rows in range
6255   @retval
6256     \#			Error code
6257 */
read_range_next()6258 int handler::read_range_next()
6259 {
6260   int result;
6261   DBUG_ENTER("handler::read_range_next");
6262 
6263   if (eq_range)
6264   {
6265     /* We trust that index_next_same always gives a row in range */
6266     DBUG_RETURN(ha_index_next_same(table->record[0],
6267                                    end_range->key,
6268                                    end_range->length));
6269   }
6270   result= ha_index_next(table->record[0]);
6271   if (result)
6272     DBUG_RETURN(result);
6273 
6274   if (compare_key(end_range) <= 0)
6275   {
6276     DBUG_RETURN(0);
6277   }
6278   else
6279   {
6280     /*
6281       The last read row does not fall in the range. So request
6282       storage engine to release row lock if possible.
6283     */
6284     unlock_row();
6285     DBUG_RETURN(HA_ERR_END_OF_FILE);
6286   }
6287 }
6288 
6289 
set_end_range(const key_range * end_key)6290 void handler::set_end_range(const key_range *end_key)
6291 {
6292   end_range= 0;
6293   if (end_key)
6294   {
6295     end_range= &save_end_range;
6296     save_end_range= *end_key;
6297     key_compare_result_on_equal=
6298       ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
6299        (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
6300   }
6301 }
6302 
6303 
6304 /**
6305   Compare if found key (in row) is over max-value.
6306 
6307   @param range		range to compare to row. May be 0 for no range
6308 
6309   @see also
6310     key.cc::key_cmp()
6311 
6312   @return
6313     The return value is SIGN(key_in_row - range_key):
6314 
6315     - 0   : Key is equal to range or 'range' == 0 (no range)
6316     - -1  : Key is less than range
6317     - 1   : Key is larger than range
6318 */
compare_key(key_range * range)6319 int handler::compare_key(key_range *range)
6320 {
6321   int cmp;
6322   if (!range || in_range_check_pushed_down)
6323     return 0;					// No max range
6324   cmp= key_cmp(range_key_part, range->key, range->length);
6325   if (!cmp)
6326     cmp= key_compare_result_on_equal;
6327   return cmp;
6328 }
6329 
6330 
6331 /*
6332   Same as compare_key() but doesn't check have in_range_check_pushed_down.
6333   This is used by index condition pushdown implementation.
6334 */
6335 
compare_key2(key_range * range) const6336 int handler::compare_key2(key_range *range) const
6337 {
6338   int cmp;
6339   if (!range)
6340     return 0;					// no max range
6341   cmp= key_cmp(range_key_part, range->key, range->length);
6342   if (!cmp)
6343     cmp= key_compare_result_on_equal;
6344   return cmp;
6345 }
6346 
6347 
6348 /**
6349   ICP callback - to be called by an engine to check the pushed condition
6350 */
handler_index_cond_check(void * h_arg)6351 extern "C" check_result_t handler_index_cond_check(void* h_arg)
6352 {
6353   handler *h= (handler*)h_arg;
6354   THD *thd= h->table->in_use;
6355   check_result_t res;
6356 
6357   DEBUG_SYNC(thd, "handler_index_cond_check");
6358   enum thd_kill_levels abort_at= h->has_rollback() ?
6359     THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6360   if (thd_kill_level(thd) > abort_at)
6361     return CHECK_ABORTED_BY_USER;
6362 
6363   if (h->end_range && h->compare_key2(h->end_range) > 0)
6364     return CHECK_OUT_OF_RANGE;
6365   h->increment_statistics(&SSV::ha_icp_attempts);
6366   if ((res= h->pushed_idx_cond->val_int()? CHECK_POS : CHECK_NEG) ==
6367       CHECK_POS)
6368     h->increment_statistics(&SSV::ha_icp_match);
6369   return res;
6370 }
6371 
6372 
6373 /**
6374   Rowid filter callback - to be called by an engine to check rowid / primary
6375   keys of the rows whose data is to be fetched against the used rowid filter
6376 */
6377 
6378 extern "C"
handler_rowid_filter_check(void * h_arg)6379 check_result_t handler_rowid_filter_check(void *h_arg)
6380 {
6381   handler *h= (handler*) h_arg;
6382   TABLE *tab= h->get_table();
6383 
6384   /*
6385     Check for out-of-range and killed conditions only if we haven't done it
6386     already in the pushed index condition check
6387   */
6388   if (!h->pushed_idx_cond)
6389   {
6390     THD *thd= h->table->in_use;
6391     DEBUG_SYNC(thd, "handler_rowid_filter_check");
6392     enum thd_kill_levels abort_at= h->has_transactions() ?
6393       THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6394     if (thd_kill_level(thd) > abort_at)
6395       return CHECK_ABORTED_BY_USER;
6396 
6397     if (h->end_range && h->compare_key2(h->end_range) > 0)
6398       return CHECK_OUT_OF_RANGE;
6399   }
6400 
6401   h->position(tab->record[0]);
6402   return h->pushed_rowid_filter->check((char*)h->ref)? CHECK_POS: CHECK_NEG;
6403 }
6404 
6405 
6406 /**
6407   Callback function for an engine to check whether the used rowid filter
6408   has been already built
6409 */
6410 
handler_rowid_filter_is_active(void * h_arg)6411 extern "C" int handler_rowid_filter_is_active(void *h_arg)
6412 {
6413   if (!h_arg)
6414     return false;
6415   handler *h= (handler*) h_arg;
6416   return h->rowid_filter_is_active;
6417 }
6418 
6419 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)6420 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
6421                                 key_part_map keypart_map,
6422                                 enum ha_rkey_function find_flag)
6423 {
6424   int error, UNINIT_VAR(error1);
6425 
6426   error= ha_index_init(index, 0);
6427   if (likely(!error))
6428   {
6429     error= index_read_map(buf, key, keypart_map, find_flag);
6430     error1= ha_index_end();
6431   }
6432   return error ? error : error1;
6433 }
6434 
6435 
6436 /**
6437   Returns a list of all known extensions.
6438 
6439     No mutexes, worst case race is a minor surplus memory allocation
6440     We have to recreate the extension map if mysqld is restarted (for example
6441     within libmysqld)
6442 
6443   @retval
6444     pointer		pointer to TYPELIB structure
6445 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)6446 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
6447                                void *arg)
6448 {
6449   List<char> *found_exts= (List<char> *) arg;
6450   handlerton *hton= plugin_hton(plugin);
6451   List_iterator_fast<char> it(*found_exts);
6452   const char **ext, *old_ext;
6453 
6454   for (ext= hton->tablefile_extensions; *ext; ext++)
6455   {
6456     while ((old_ext= it++))
6457     {
6458       if (!strcmp(old_ext, *ext))
6459         break;
6460     }
6461     if (!old_ext)
6462       found_exts->push_back((char *) *ext);
6463 
6464     it.rewind();
6465   }
6466   return FALSE;
6467 }
6468 
ha_known_exts(void)6469 TYPELIB *ha_known_exts(void)
6470 {
6471   if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
6472   {
6473     List<char> found_exts;
6474     const char **ext, *old_ext;
6475 
6476     known_extensions_id= mysys_usage_id;
6477     found_exts.push_back((char*) TRG_EXT);
6478     found_exts.push_back((char*) TRN_EXT);
6479 
6480     plugin_foreach(NULL, exts_handlerton,
6481                    MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
6482 
6483     ext= (const char **) my_once_alloc(sizeof(char *)*
6484                                        (found_exts.elements+1),
6485                                        MYF(MY_WME | MY_FAE));
6486 
6487     DBUG_ASSERT(ext != 0);
6488     known_extensions.count= found_exts.elements;
6489     known_extensions.type_names= ext;
6490 
6491     List_iterator_fast<char> it(found_exts);
6492     while ((old_ext= it++))
6493       *ext++= old_ext;
6494     *ext= 0;
6495   }
6496   return &known_extensions;
6497 }
6498 
6499 
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)6500 static bool stat_print(THD *thd, const char *type, size_t type_len,
6501                        const char *file, size_t file_len,
6502                        const char *status, size_t status_len)
6503 {
6504   Protocol *protocol= thd->protocol;
6505   protocol->prepare_for_resend();
6506   protocol->store(type, type_len, system_charset_info);
6507   protocol->store(file, file_len, system_charset_info);
6508   protocol->store(status, status_len, system_charset_info);
6509   if (protocol->write())
6510     return TRUE;
6511   return FALSE;
6512 }
6513 
6514 
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)6515 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
6516                                    void *arg)
6517 {
6518   enum ha_stat_type stat= *(enum ha_stat_type *) arg;
6519   handlerton *hton= plugin_hton(plugin);
6520   if (hton->show_status &&
6521       hton->show_status(hton, thd, stat_print, stat))
6522     return TRUE;
6523   return FALSE;
6524 }
6525 
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)6526 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
6527 {
6528   List<Item> field_list;
6529   Protocol *protocol= thd->protocol;
6530   MEM_ROOT *mem_root= thd->mem_root;
6531   bool result;
6532 
6533   field_list.push_back(new (mem_root) Item_empty_string(thd, "Type", 10),
6534                        mem_root);
6535   field_list.push_back(new (mem_root)
6536                        Item_empty_string(thd, "Name", FN_REFLEN), mem_root);
6537   field_list.push_back(new (mem_root)
6538                        Item_empty_string(thd, "Status", 10),
6539                        mem_root);
6540 
6541   if (protocol->send_result_set_metadata(&field_list,
6542                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
6543     return TRUE;
6544 
6545   if (db_type == NULL)
6546   {
6547     result= plugin_foreach(thd, showstat_handlerton,
6548                            MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
6549   }
6550   else
6551   {
6552     result= db_type->show_status &&
6553             db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
6554   }
6555 
6556   /*
6557     We also check thd->is_error() as Innodb may return 0 even if
6558     there was an error.
6559   */
6560   if (likely(!result && !thd->is_error()))
6561     my_eof(thd);
6562   else if (!thd->is_error())
6563     my_error(ER_GET_ERRNO, MYF(0), errno, hton_name(db_type)->str);
6564   return result;
6565 }
6566 
6567 /*
6568   Function to check if the conditions for row-based binlogging is
6569   correct for the table.
6570 
6571   A row in the given table should be replicated if:
6572   - It's not called by partition engine
6573   - Row-based replication is enabled in the current thread
6574   - The binlog is enabled
6575   - It is not a temporary table
6576   - The binary log is open
6577   - The database the table resides in shall be binlogged (binlog_*_db rules)
6578   - table is not mysql.event
6579 
6580   RETURN VALUE
6581     0  No binary logging in row format
6582     1  Row needs to be logged
6583 */
6584 
check_table_binlog_row_based()6585 bool handler::check_table_binlog_row_based()
6586 {
6587   if (unlikely((!check_table_binlog_row_based_done)))
6588   {
6589     check_table_binlog_row_based_done= 1;
6590     check_table_binlog_row_based_result=
6591       check_table_binlog_row_based_internal();
6592   }
6593   return check_table_binlog_row_based_result;
6594 }
6595 
check_table_binlog_row_based_internal()6596 bool handler::check_table_binlog_row_based_internal()
6597 {
6598   THD *thd= table->in_use;
6599 
6600 #ifdef WITH_WSREP
6601   if (!thd->variables.sql_log_bin &&
6602       wsrep_thd_is_applying(table->in_use))
6603   {
6604     /*
6605       wsrep patch sets sql_log_bin to silence binlogging from high
6606       priority threads
6607     */
6608     return 0;
6609   }
6610 #endif
6611   return (table->s->can_do_row_logging &&
6612           !table->versioned(VERS_TRX_ID) &&
6613           !(thd->variables.option_bits & OPTION_BIN_TMP_LOG_OFF) &&
6614           thd->is_current_stmt_binlog_format_row() &&
6615           /*
6616             Wsrep partially enables binary logging if it have not been
6617             explicitly turned on. As a result we return 'true' if we are in
6618             wsrep binlog emulation mode and the current thread is not a wsrep
6619             applier or replayer thread. This decision is not affected by
6620             @@sql_log_bin as we want the events to make into the binlog
6621             cache only to filter them later before they make into binary log
6622             file.
6623 
6624             However, we do return 'false' if binary logging was temporarily
6625             turned off (see tmp_disable_binlog(A)).
6626 
6627             Otherwise, return 'true' if binary logging is on.
6628           */
6629           IF_WSREP(((WSREP_EMULATE_BINLOG_NNULL(thd) &&
6630                      wsrep_thd_is_local(thd)) ||
6631                     ((WSREP_NNULL(thd) ||
6632                       (thd->variables.option_bits & OPTION_BIN_LOG)) &&
6633                      mysql_bin_log.is_open())),
6634                     (thd->variables.option_bits & OPTION_BIN_LOG) &&
6635                     mysql_bin_log.is_open()));
6636 }
6637 
6638 
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)6639 int handler::binlog_log_row(TABLE *table,
6640                             const uchar *before_record,
6641                             const uchar *after_record,
6642                             Log_func *log_func)
6643 {
6644   bool error;
6645   THD *thd= table->in_use;
6646   DBUG_ENTER("binlog_log_row");
6647 
6648   if (!thd->binlog_table_maps &&
6649       thd->binlog_write_table_maps())
6650     DBUG_RETURN(HA_ERR_RBR_LOGGING_FAILED);
6651 
6652   error= (*log_func)(thd, table, row_logging_has_trans,
6653                      before_record, after_record);
6654   DBUG_RETURN(error ? HA_ERR_RBR_LOGGING_FAILED : 0);
6655 }
6656 
6657 
ha_external_lock(THD * thd,int lock_type)6658 int handler::ha_external_lock(THD *thd, int lock_type)
6659 {
6660   int error;
6661   DBUG_ENTER("handler::ha_external_lock");
6662   /*
6663     Whether this is lock or unlock, this should be true, and is to verify that
6664     if get_auto_increment() was called (thus may have reserved intervals or
6665     taken a table lock), ha_release_auto_increment() was too.
6666   */
6667   DBUG_ASSERT(next_insert_id == 0);
6668   /* Consecutive calls for lock without unlocking in between is not allowed */
6669   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6670               ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
6671                lock_type == F_UNLCK));
6672   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
6673   DBUG_ASSERT(inited == NONE || table->open_by_handler);
6674 
6675   if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
6676       MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
6677       MYSQL_HANDLER_UNLOCK_START_ENABLED())
6678   {
6679     if (lock_type == F_RDLCK)
6680     {
6681       MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
6682                                  table_share->table_name.str);
6683     }
6684     else if (lock_type == F_WRLCK)
6685     {
6686       MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
6687                                  table_share->table_name.str);
6688     }
6689     else if (lock_type == F_UNLCK)
6690     {
6691       MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
6692                                  table_share->table_name.str);
6693     }
6694   }
6695 
6696   /*
6697     We cache the table flags if the locking succeeded. Otherwise, we
6698     keep them as they were when they were fetched in ha_open().
6699   */
6700   MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
6701     { error= external_lock(thd, lock_type); })
6702 
6703   DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;);
6704 
6705   if (likely(error == 0 || lock_type == F_UNLCK))
6706   {
6707     m_lock_type= lock_type;
6708     cached_table_flags= table_flags();
6709     if (table_share->tmp_table == NO_TMP_TABLE)
6710       mysql_audit_external_lock(thd, table_share, lock_type);
6711   }
6712 
6713   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
6714       MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
6715       MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
6716   {
6717     if (lock_type == F_RDLCK)
6718     {
6719       MYSQL_HANDLER_RDLOCK_DONE(error);
6720     }
6721     else if (lock_type == F_WRLCK)
6722     {
6723       MYSQL_HANDLER_WRLOCK_DONE(error);
6724     }
6725     else if (lock_type == F_UNLCK)
6726     {
6727       MYSQL_HANDLER_UNLOCK_DONE(error);
6728     }
6729   }
6730   DBUG_RETURN(error);
6731 }
6732 
6733 
6734 /** @brief
6735   Check handler usage and reset state of file to after 'open'
6736 */
ha_reset()6737 int handler::ha_reset()
6738 {
6739   DBUG_ENTER("ha_reset");
6740 
6741   /* Check that we have called all proper deallocation functions */
6742   DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
6743               table->s->column_bitmap_size ==
6744               (uchar*) table->def_write_set.bitmap);
6745   DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
6746   DBUG_ASSERT(!table->file->keyread_enabled());
6747   /* ensure that ha_index_end / ha_rnd_end has been called */
6748   DBUG_ASSERT(inited == NONE);
6749   /* reset the bitmaps to point to defaults */
6750   table->default_column_bitmaps();
6751   pushed_cond= NULL;
6752   tracker= NULL;
6753   mark_trx_read_write_done= 0;
6754   /*
6755     Disable row logging.
6756   */
6757   row_logging= row_logging_init= 0;
6758   clear_cached_table_binlog_row_based_flag();
6759   /* Reset information about pushed engine conditions */
6760   cancel_pushed_idx_cond();
6761   /* Reset information about pushed index conditions */
6762   cancel_pushed_rowid_filter();
6763   if (lookup_handler != this)
6764   {
6765     lookup_handler->ha_external_unlock(table->in_use);
6766     lookup_handler->close();
6767     delete lookup_handler;
6768     lookup_handler= this;
6769   }
6770   DBUG_RETURN(reset());
6771 }
6772 
6773 #ifdef WITH_WSREP
wsrep_after_row(THD * thd)6774 static int wsrep_after_row(THD *thd)
6775 {
6776   DBUG_ENTER("wsrep_after_row");
6777   if (thd->internal_transaction())
6778     DBUG_RETURN(0);
6779 
6780   /* enforce wsrep_max_ws_rows */
6781   thd->wsrep_affected_rows++;
6782   if (wsrep_max_ws_rows &&
6783       thd->wsrep_affected_rows > wsrep_max_ws_rows &&
6784       wsrep_thd_is_local(thd))
6785   {
6786     trans_rollback_stmt(thd) || trans_rollback(thd);
6787     my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
6788     DBUG_RETURN(ER_ERROR_DURING_COMMIT);
6789   }
6790   else if (wsrep_after_row_internal(thd))
6791   {
6792     DBUG_RETURN(ER_LOCK_DEADLOCK);
6793   }
6794   DBUG_RETURN(0);
6795 }
6796 #endif /* WITH_WSREP */
6797 
6798 
6799 /**
6800    Check if there is a conflicting unique hash key
6801 */
6802 
check_duplicate_long_entry_key(const uchar * new_rec,uint key_no)6803 int handler::check_duplicate_long_entry_key(const uchar *new_rec, uint key_no)
6804 {
6805   int result, error= 0;
6806   KEY *key_info= table->key_info + key_no;
6807   Field *hash_field= key_info->key_part->field;
6808   uchar ptr[HA_HASH_KEY_LENGTH_WITH_NULL];
6809   DBUG_ENTER("handler::check_duplicate_long_entry_key");
6810 
6811   DBUG_ASSERT((key_info->flags & HA_NULL_PART_KEY &&
6812                key_info->key_length == HA_HASH_KEY_LENGTH_WITH_NULL) ||
6813               key_info->key_length == HA_HASH_KEY_LENGTH_WITHOUT_NULL);
6814 
6815   if (hash_field->is_real_null())
6816     DBUG_RETURN(0);
6817 
6818   key_copy(ptr, new_rec, key_info, key_info->key_length, false);
6819 
6820   result= lookup_handler->ha_index_init(key_no, 0);
6821   if (result)
6822     DBUG_RETURN(result);
6823   store_record(table, file->lookup_buffer);
6824   result= lookup_handler->ha_index_read_map(table->record[0],
6825                                ptr, HA_WHOLE_KEY, HA_READ_KEY_EXACT);
6826   if (!result)
6827   {
6828     bool is_same;
6829     Field * t_field;
6830     Item_func_hash * temp= (Item_func_hash *)hash_field->vcol_info->expr;
6831     Item ** arguments= temp->arguments();
6832     uint arg_count= temp->argument_count();
6833     do
6834     {
6835       my_ptrdiff_t diff= table->file->lookup_buffer - new_rec;
6836       is_same= true;
6837       for (uint j=0; is_same && j < arg_count; j++)
6838       {
6839         DBUG_ASSERT(arguments[j]->type() == Item::FIELD_ITEM ||
6840                     // this one for left(fld_name,length)
6841                     arguments[j]->type() == Item::FUNC_ITEM);
6842         if (arguments[j]->type() == Item::FIELD_ITEM)
6843         {
6844           t_field= static_cast<Item_field *>(arguments[j])->field;
6845           if (t_field->cmp_offset(diff))
6846             is_same= false;
6847         }
6848         else
6849         {
6850           Item_func_left *fnc= static_cast<Item_func_left *>(arguments[j]);
6851           DBUG_ASSERT(!my_strcasecmp(system_charset_info, "left", fnc->func_name()));
6852           DBUG_ASSERT(fnc->arguments()[0]->type() == Item::FIELD_ITEM);
6853           t_field= static_cast<Item_field *>(fnc->arguments()[0])->field;
6854           uint length= (uint)fnc->arguments()[1]->val_int();
6855           if (t_field->cmp_prefix(t_field->ptr, t_field->ptr + diff, length))
6856             is_same= false;
6857         }
6858       }
6859     }
6860     while (!is_same &&
6861            !(result= lookup_handler->ha_index_next_same(table->record[0],
6862                                                 ptr, key_info->key_length)));
6863     if (is_same)
6864       error= HA_ERR_FOUND_DUPP_KEY;
6865     goto exit;
6866   }
6867   if (result != HA_ERR_KEY_NOT_FOUND)
6868     error= result;
6869 exit:
6870   if (error == HA_ERR_FOUND_DUPP_KEY)
6871   {
6872     table->file->lookup_errkey= key_no;
6873     if (ha_table_flags() & HA_DUPLICATE_POS)
6874     {
6875       lookup_handler->position(table->record[0]);
6876       memcpy(table->file->dup_ref, lookup_handler->ref, ref_length);
6877     }
6878   }
6879   restore_record(table, file->lookup_buffer);
6880   lookup_handler->ha_index_end();
6881   DBUG_RETURN(error);
6882 }
6883 
alloc_lookup_buffer()6884 void handler::alloc_lookup_buffer()
6885 {
6886   if (!lookup_buffer)
6887     lookup_buffer= (uchar*)alloc_root(&table->mem_root,
6888                                       table_share->max_unique_length
6889                                       + table_share->null_fields
6890                                       + table_share->reclength);
6891 }
6892 
6893 /** @brief
6894     check whether inserted records breaks the
6895     unique constraint on long columns.
6896     @returns 0 if no duplicate else returns error
6897   */
check_duplicate_long_entries(const uchar * new_rec)6898 int handler::check_duplicate_long_entries(const uchar *new_rec)
6899 {
6900   lookup_errkey= (uint)-1;
6901   for (uint i= 0; i < table->s->keys; i++)
6902   {
6903     int result;
6904     if (table->key_info[i].algorithm == HA_KEY_ALG_LONG_HASH &&
6905         (result= check_duplicate_long_entry_key(new_rec, i)))
6906       return result;
6907   }
6908   return 0;
6909 }
6910 
6911 
6912 /** @brief
6913     check whether updated records breaks the
6914     unique constraint on long columns.
6915     In the case of update we just need to check the specic key
6916     reason for that is consider case
6917     create table t1(a blob , b blob , x blob , y blob ,unique(a,b)
6918                                                     ,unique(x,y))
6919     and update statement like this
6920     update t1 set a=23+a; in this case if we try to scan for
6921     whole keys in table then index scan on x_y will return 0
6922     because data is same so in the case of update we take
6923     key as a parameter in normal insert key should be -1
6924     @returns 0 if no duplicate else returns error
6925   */
check_duplicate_long_entries_update(const uchar * new_rec)6926 int handler::check_duplicate_long_entries_update(const uchar *new_rec)
6927 {
6928   Field *field;
6929   uint key_parts;
6930   KEY *keyinfo;
6931   KEY_PART_INFO *keypart;
6932   /*
6933      Here we are comparing whether new record and old record are same
6934      with respect to fields in hash_str
6935    */
6936   uint reclength= (uint) (table->record[1] - table->record[0]);
6937 
6938   for (uint i= 0; i < table->s->keys; i++)
6939   {
6940     keyinfo= table->key_info + i;
6941     if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH)
6942     {
6943       key_parts= fields_in_hash_keyinfo(keyinfo);
6944       keypart= keyinfo->key_part - key_parts;
6945       for (uint j= 0; j < key_parts; j++, keypart++)
6946       {
6947         int error;
6948         field= keypart->field;
6949         /* Compare fields if they are different then check for duplicates */
6950         if (field->cmp_binary_offset(reclength))
6951         {
6952           if((error= check_duplicate_long_entry_key(new_rec, i)))
6953             return error;
6954           /*
6955             break because check_duplicate_long_entries_key will
6956             take care of remaining fields
6957            */
6958           break;
6959         }
6960       }
6961     }
6962   }
6963   return 0;
6964 }
6965 
6966 
ha_check_overlaps(const uchar * old_data,const uchar * new_data)6967 int handler::ha_check_overlaps(const uchar *old_data, const uchar* new_data)
6968 {
6969   DBUG_ASSERT(new_data);
6970   if (this != table->file)
6971     return 0;
6972   if (!table_share->period.unique_keys)
6973     return 0;
6974   if (table->versioned() && !table->vers_end_field()->is_max())
6975     return 0;
6976 
6977   const bool is_update= old_data != NULL;
6978   uchar *record_buffer= lookup_buffer + table_share->max_unique_length
6979                                       + table_share->null_fields;
6980 
6981   // Needed to compare record refs later
6982   if (is_update)
6983     position(old_data);
6984 
6985   DBUG_ASSERT(!keyread_enabled());
6986 
6987   int error= 0;
6988   lookup_errkey= (uint)-1;
6989 
6990   for (uint key_nr= 0; key_nr < table_share->keys && !error; key_nr++)
6991   {
6992     const KEY &key_info= table->key_info[key_nr];
6993     const uint key_parts= key_info.user_defined_key_parts;
6994     if (!key_info.without_overlaps)
6995       continue;
6996 
6997     if (is_update)
6998     {
6999       bool key_used= false;
7000       for (uint k= 0; k < key_parts && !key_used; k++)
7001         key_used= bitmap_is_set(table->write_set,
7002                                 key_info.key_part[k].fieldnr - 1);
7003       if (!key_used)
7004         continue;
7005     }
7006 
7007     error= lookup_handler->ha_index_init(key_nr, 0);
7008     if (error)
7009       return error;
7010 
7011     error= lookup_handler->ha_start_keyread(key_nr);
7012     DBUG_ASSERT(!error);
7013 
7014     const uint period_field_length= key_info.key_part[key_parts - 1].length;
7015     const uint key_base_length= key_info.key_length - 2 * period_field_length;
7016 
7017     key_copy(lookup_buffer, new_data, &key_info, 0);
7018 
7019     /* Copy period_start to period_end.
7020        the value in period_start field is not significant, but anyway let's leave
7021        it defined to avoid uninitialized memory access
7022      */
7023     memcpy(lookup_buffer + key_base_length,
7024            lookup_buffer + key_base_length + period_field_length,
7025            period_field_length);
7026 
7027     /* Find row with period_end > (period_start of new_data) */
7028     error = lookup_handler->ha_index_read_map(record_buffer, lookup_buffer,
7029                                        key_part_map((1 << (key_parts - 1)) - 1),
7030                                        HA_READ_AFTER_KEY);
7031 
7032     if (!error && is_update)
7033     {
7034       /* In case of update it could happen that the nearest neighbour is
7035          a record we are updating. It means, that there are no overlaps
7036          from this side.
7037       */
7038       DBUG_ASSERT(lookup_handler != this);
7039       DBUG_ASSERT(ref_length == lookup_handler->ref_length);
7040 
7041       lookup_handler->position(record_buffer);
7042       if (memcmp(ref, lookup_handler->ref, ref_length) == 0)
7043         error= lookup_handler->ha_index_next(record_buffer);
7044     }
7045 
7046     if (!error && table->check_period_overlaps(key_info, new_data, record_buffer))
7047       error= HA_ERR_FOUND_DUPP_KEY;
7048 
7049     if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
7050       error= 0;
7051 
7052     if (error == HA_ERR_FOUND_DUPP_KEY)
7053       lookup_errkey= key_nr;
7054 
7055     int end_error= lookup_handler->ha_end_keyread();
7056     DBUG_ASSERT(!end_error);
7057 
7058     end_error= lookup_handler->ha_index_end();
7059     if (!error && end_error)
7060       error= end_error;
7061   }
7062 
7063   return error;
7064 }
7065 
7066 
7067 /**
7068   Check if galera disables binary logging for this table
7069 
7070   @return 0  Binary logging disabled
7071   @return 1  Binary logging can be enabled
7072 */
7073 
7074 
wsrep_check_if_binlog_row(TABLE * table)7075 static inline bool wsrep_check_if_binlog_row(TABLE *table)
7076 {
7077 #ifdef WITH_WSREP
7078   THD *const thd= table->in_use;
7079 
7080   /* only InnoDB tables will be replicated through binlog emulation */
7081   if ((WSREP_EMULATE_BINLOG(thd) &&
7082        !(table->file->partition_ht()->flags & HTON_WSREP_REPLICATION)) ||
7083       thd->wsrep_ignore_table == true)
7084     return 0;
7085 #endif
7086   return 1;
7087 }
7088 
7089 
7090 /**
7091    Prepare handler for row logging
7092 
7093    @return 0 if handler will not participate in row logging
7094    @return 1 handler will participate in row logging
7095 
7096    This function is always safe to call on an opened table.
7097 */
7098 
prepare_for_row_logging()7099 bool handler::prepare_for_row_logging()
7100 {
7101   DBUG_ENTER("handler::prepare_for_row_logging");
7102 
7103   /* Check if we should have row logging */
7104   if (wsrep_check_if_binlog_row(table) &&
7105       check_table_binlog_row_based())
7106   {
7107     /*
7108       Row logging enabled. Intialize all variables and write
7109       annotated and table maps
7110     */
7111     row_logging= row_logging_init= 1;
7112 
7113     /*
7114       We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7115       (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7116       compatible behavior with the STMT based replication even when
7117       the table is not transactional. In other words, if the operation
7118       fails while executing the insert phase nothing is written to the
7119       binlog.
7120     */
7121     row_logging_has_trans=
7122       ((sql_command_flags[table->in_use->lex->sql_command] &
7123         (CF_SCHEMA_CHANGE | CF_ADMIN_COMMAND)) ||
7124        table->file->has_transactions_and_rollback());
7125   }
7126   else
7127   {
7128     /* Check row_logging has not been properly cleared from previous command */
7129     DBUG_ASSERT(row_logging == 0);
7130   }
7131   DBUG_RETURN(row_logging);
7132 }
7133 
7134 
7135 /*
7136   Do all initialization needed for insert
7137 */
7138 
prepare_for_insert(bool do_create)7139 int handler::prepare_for_insert(bool do_create)
7140 {
7141   /* Preparation for unique of blob's */
7142   if (table->s->long_unique_table || table->s->period.unique_keys)
7143   {
7144     if (do_create && create_lookup_handler())
7145       return 1;
7146     alloc_lookup_buffer();
7147   }
7148   return 0;
7149 }
7150 
7151 
ha_write_row(const uchar * buf)7152 int handler::ha_write_row(const uchar *buf)
7153 {
7154   int error;
7155   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7156               m_lock_type == F_WRLCK);
7157   DBUG_ENTER("handler::ha_write_row");
7158   DEBUG_SYNC_C("ha_write_row_start");
7159 
7160   if ((error= ha_check_overlaps(NULL, buf)))
7161     DBUG_RETURN(error);
7162 
7163   if (table->s->long_unique_table && this == table->file)
7164   {
7165     DBUG_ASSERT(inited == NONE || lookup_handler != this);
7166     if ((error= check_duplicate_long_entries(buf)))
7167       DBUG_RETURN(error);
7168   }
7169 
7170   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
7171   mark_trx_read_write();
7172   increment_statistics(&SSV::ha_write_count);
7173 
7174   TABLE_IO_WAIT(tracker, PSI_TABLE_WRITE_ROW, MAX_KEY, error,
7175                       { error= write_row(buf); })
7176 
7177   MYSQL_INSERT_ROW_DONE(error);
7178   if (likely(!error))
7179   {
7180     rows_changed++;
7181     if (row_logging)
7182     {
7183       Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
7184       error= binlog_log_row(table, 0, buf, log_func);
7185     }
7186 #ifdef WITH_WSREP
7187     if (WSREP_NNULL(ha_thd()) && table_share->tmp_table == NO_TMP_TABLE &&
7188         ht->flags & HTON_WSREP_REPLICATION &&
7189         !error && (error= wsrep_after_row(ha_thd())))
7190     {
7191       DBUG_RETURN(error);
7192     }
7193 #endif /* WITH_WSREP */
7194   }
7195 
7196   DEBUG_SYNC_C("ha_write_row_end");
7197   DBUG_RETURN(error);
7198 }
7199 
7200 
ha_update_row(const uchar * old_data,const uchar * new_data)7201 int handler::ha_update_row(const uchar *old_data, const uchar *new_data)
7202 {
7203   int error;
7204   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7205               m_lock_type == F_WRLCK);
7206   /*
7207     Some storage engines require that the new record is in record[0]
7208     (and the old record is in record[1]).
7209    */
7210   DBUG_ASSERT(new_data == table->record[0]);
7211   DBUG_ASSERT(old_data == table->record[1]);
7212 
7213   uint saved_status= table->status;
7214   error= ha_check_overlaps(old_data, new_data);
7215 
7216   if (!error && table->s->long_unique_table && this == table->file)
7217     error= check_duplicate_long_entries_update(new_data);
7218   table->status= saved_status;
7219 
7220   if (error)
7221     return error;
7222 
7223   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7224   mark_trx_read_write();
7225   increment_statistics(&SSV::ha_update_count);
7226 
7227   TABLE_IO_WAIT(tracker, PSI_TABLE_UPDATE_ROW, active_index, 0,
7228                       { error= update_row(old_data, new_data);})
7229 
7230   MYSQL_UPDATE_ROW_DONE(error);
7231   if (likely(!error))
7232   {
7233     rows_changed++;
7234     if (row_logging)
7235     {
7236       Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
7237       error= binlog_log_row(table, old_data, new_data, log_func);
7238     }
7239 #ifdef WITH_WSREP
7240     THD *thd= ha_thd();
7241     if (WSREP_NNULL(thd))
7242     {
7243       /* for streaming replication, the following wsrep_after_row()
7244       may replicate a fragment, so we have to declare potential PA
7245       unsafe before that */
7246       if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd))
7247       {
7248         WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
7249         if (thd->wsrep_cs().mark_transaction_pa_unsafe())
7250           WSREP_DEBUG("session does not have active transaction,"
7251                       " can not mark as PA unsafe");
7252       }
7253 
7254       if (!error && table_share->tmp_table == NO_TMP_TABLE &&
7255           ht->flags & HTON_WSREP_REPLICATION)
7256         error= wsrep_after_row(thd);
7257     }
7258 #endif /* WITH_WSREP */
7259   }
7260   return error;
7261 }
7262 
7263 /*
7264   Update first row. Only used by sequence tables
7265 */
7266 
update_first_row(const uchar * new_data)7267 int handler::update_first_row(const uchar *new_data)
7268 {
7269   int error;
7270   if (likely(!(error= ha_rnd_init(1))))
7271   {
7272     int end_error;
7273     if (likely(!(error= ha_rnd_next(table->record[1]))))
7274     {
7275       /*
7276         We have to do the memcmp as otherwise we may get error 169 from InnoDB
7277       */
7278       if (memcmp(new_data, table->record[1], table->s->reclength))
7279         error= update_row(table->record[1], new_data);
7280     }
7281     end_error= ha_rnd_end();
7282     if (likely(!error))
7283       error= end_error;
7284     /* Logging would be wrong if update_row works but ha_rnd_end fails */
7285     DBUG_ASSERT(!end_error || error != 0);
7286   }
7287   return error;
7288 }
7289 
7290 
ha_delete_row(const uchar * buf)7291 int handler::ha_delete_row(const uchar *buf)
7292 {
7293   int error;
7294   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7295               m_lock_type == F_WRLCK);
7296   /*
7297     Normally table->record[0] is used, but sometimes table->record[1] is used.
7298   */
7299   DBUG_ASSERT(buf == table->record[0] ||
7300               buf == table->record[1]);
7301 
7302   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
7303   mark_trx_read_write();
7304   increment_statistics(&SSV::ha_delete_count);
7305 
7306   TABLE_IO_WAIT(tracker, PSI_TABLE_DELETE_ROW, active_index, error,
7307     { error= delete_row(buf);})
7308   MYSQL_DELETE_ROW_DONE(error);
7309   if (likely(!error))
7310   {
7311     rows_changed++;
7312     if (row_logging)
7313     {
7314       Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
7315       error= binlog_log_row(table, buf, 0, log_func);
7316     }
7317 #ifdef WITH_WSREP
7318     THD *thd= ha_thd();
7319     if (WSREP_NNULL(thd))
7320     {
7321       /* for streaming replication, the following wsrep_after_row()
7322       may replicate a fragment, so we have to declare potential PA
7323       unsafe before that */
7324       if (table->s->primary_key == MAX_KEY && wsrep_thd_is_local(thd))
7325       {
7326         WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
7327         if (thd->wsrep_cs().mark_transaction_pa_unsafe())
7328           WSREP_DEBUG("session does not have active transaction,"
7329                       " can not mark as PA unsafe");
7330       }
7331 
7332       if (!error && table_share->tmp_table == NO_TMP_TABLE &&
7333           ht->flags & HTON_WSREP_REPLICATION)
7334         error= wsrep_after_row(thd);
7335     }
7336 #endif /* WITH_WSREP */
7337   }
7338   return error;
7339 }
7340 
7341 
7342 /**
7343   Execute a direct update request.  A direct update request updates all
7344   qualified rows in a single operation, rather than one row at a time.
7345   In a Spider cluster the direct update operation is pushed down to the
7346   child levels of the cluster.
7347 
7348   Note that this can't be used in case of statment logging
7349 
7350   @param  update_rows   Number of updated rows.
7351 
7352   @retval 0             Success.
7353   @retval != 0          Failure.
7354 */
7355 
ha_direct_update_rows(ha_rows * update_rows,ha_rows * found_rows)7356 int handler::ha_direct_update_rows(ha_rows *update_rows, ha_rows *found_rows)
7357 {
7358   int error;
7359   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7360   mark_trx_read_write();
7361 
7362   error= direct_update_rows(update_rows, found_rows);
7363   MYSQL_UPDATE_ROW_DONE(error);
7364   return error;
7365 }
7366 
7367 
7368 /**
7369   Execute a direct delete request.  A direct delete request deletes all
7370   qualified rows in a single operation, rather than one row at a time.
7371   In a Spider cluster the direct delete operation is pushed down to the
7372   child levels of the cluster.
7373 
7374   @param  delete_rows   Number of deleted rows.
7375 
7376   @retval 0             Success.
7377   @retval != 0          Failure.
7378 */
7379 
ha_direct_delete_rows(ha_rows * delete_rows)7380 int handler::ha_direct_delete_rows(ha_rows *delete_rows)
7381 {
7382   int error;
7383   /* Ensure we are not using binlog row */
7384   DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row());
7385 
7386   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
7387   mark_trx_read_write();
7388 
7389   error = direct_delete_rows(delete_rows);
7390   MYSQL_DELETE_ROW_DONE(error);
7391   return error;
7392 }
7393 
7394 
7395 /** @brief
7396   use_hidden_primary_key() is called in case of an update/delete when
7397   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
7398   but we don't have a primary key
7399 */
use_hidden_primary_key()7400 void handler::use_hidden_primary_key()
7401 {
7402   /* fallback to use all columns in the table to identify row */
7403   table->column_bitmaps_set(&table->s->all_set, table->write_set);
7404 }
7405 
7406 
7407 /**
7408   Get an initialized ha_share.
7409 
7410   @return Initialized ha_share
7411     @retval NULL    ha_share is not yet initialized.
7412     @retval != NULL previous initialized ha_share.
7413 
7414   @note
7415   If not a temp table, then LOCK_ha_data must be held.
7416 */
7417 
get_ha_share_ptr()7418 Handler_share *handler::get_ha_share_ptr()
7419 {
7420   DBUG_ENTER("handler::get_ha_share_ptr");
7421   DBUG_ASSERT(ha_share);
7422   DBUG_ASSERT(table_share);
7423 
7424 #ifndef DBUG_OFF
7425   if (table_share->tmp_table == NO_TMP_TABLE)
7426     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7427 #endif
7428 
7429   DBUG_RETURN(*ha_share);
7430 }
7431 
7432 
7433 /**
7434   Set ha_share to be used by all instances of the same table/partition.
7435 
7436   @param ha_share    Handler_share to be shared.
7437 
7438   @note
7439   If not a temp table, then LOCK_ha_data must be held.
7440 */
7441 
set_ha_share_ptr(Handler_share * arg_ha_share)7442 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
7443 {
7444   DBUG_ENTER("handler::set_ha_share_ptr");
7445   DBUG_ASSERT(ha_share);
7446 #ifndef DBUG_OFF
7447   if (table_share->tmp_table == NO_TMP_TABLE)
7448     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7449 #endif
7450 
7451   *ha_share= arg_ha_share;
7452   DBUG_VOID_RETURN;
7453 }
7454 
7455 
7456 /**
7457   Take a lock for protecting shared handler data.
7458 */
7459 
lock_shared_ha_data()7460 void handler::lock_shared_ha_data()
7461 {
7462   DBUG_ASSERT(table_share);
7463   if (table_share->tmp_table == NO_TMP_TABLE)
7464     mysql_mutex_lock(&table_share->LOCK_ha_data);
7465 }
7466 
7467 
7468 /**
7469   Release lock for protecting ha_share.
7470 */
7471 
unlock_shared_ha_data()7472 void handler::unlock_shared_ha_data()
7473 {
7474   DBUG_ASSERT(table_share);
7475   if (table_share->tmp_table == NO_TMP_TABLE)
7476     mysql_mutex_unlock(&table_share->LOCK_ha_data);
7477 }
7478 
7479 /** @brief
7480   Dummy function which accept information about log files which is not need
7481   by handlers
7482 */
signal_log_not_needed(struct handlerton,char * log_file)7483 void signal_log_not_needed(struct handlerton, char *log_file)
7484 {
7485   DBUG_ENTER("signal_log_not_needed");
7486   DBUG_PRINT("enter", ("logfile '%s'", log_file));
7487   DBUG_VOID_RETURN;
7488 }
7489 
set_lock_type(enum thr_lock_type lock)7490 void handler::set_lock_type(enum thr_lock_type lock)
7491 {
7492   table->reginfo.lock_type= lock;
7493 }
7494 
compare_key_parts(const Field & old_field,const Column_definition & new_field,const KEY_PART_INFO & old_part,const KEY_PART_INFO & new_part) const7495 Compare_keys handler::compare_key_parts(const Field &old_field,
7496                                         const Column_definition &new_field,
7497                                         const KEY_PART_INFO &old_part,
7498                                         const KEY_PART_INFO &new_part) const
7499 {
7500   if (!old_field.is_equal(new_field))
7501     return Compare_keys::NotEqual;
7502 
7503   if (old_part.length != new_part.length)
7504     return Compare_keys::NotEqual;
7505 
7506   return Compare_keys::Equal;
7507 }
7508 
7509 #ifdef WITH_WSREP
7510 /**
7511   @details
7512   This function makes the storage engine to force the victim transaction
7513   to abort. Currently, only innodb has this functionality, but any SE
7514   implementing the wsrep API should provide this service to support
7515   multi-master operation.
7516 
7517   @note Aborting the transaction does NOT end it, it still has to
7518   be rolled back with hton->rollback().
7519 
7520   @note It is safe to abort from one thread (bf_thd) the transaction,
7521   running in another thread (victim_thd), because InnoDB's lock_sys and
7522   trx_mutex guarantee the necessary protection. However, its not safe
7523   to access victim_thd->transaction, because it's not protected from
7524   concurrent accesses. And it's an overkill to take LOCK_plugin and
7525   iterate the whole installed_htons[] array every time.
7526 
7527   @param bf_thd       brute force THD asking for the abort
7528   @param victim_thd   victim THD to be aborted
7529 
7530   @return
7531     always 0
7532 */
7533 
ha_abort_transaction(THD * bf_thd,THD * victim_thd,my_bool signal)7534 int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
7535 {
7536   DBUG_ENTER("ha_abort_transaction");
7537   if (!WSREP(bf_thd) &&
7538       !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
7539         wsrep_thd_is_toi(bf_thd))) {
7540     DBUG_RETURN(0);
7541   }
7542 
7543   handlerton *hton= installed_htons[DB_TYPE_INNODB];
7544   if (hton && hton->abort_transaction)
7545   {
7546     hton->abort_transaction(hton, bf_thd, victim_thd, signal);
7547   }
7548   else
7549   {
7550     WSREP_WARN("Cannot abort InnoDB transaction");
7551   }
7552 
7553   DBUG_RETURN(0);
7554 }
7555 #endif /* WITH_WSREP */
7556 
7557 
7558 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
7559 /*
7560   Example of transaction log management functions based on assumption that logs
7561   placed into a directory
7562 */
7563 #include <my_dir.h>
7564 #include <my_sys.h>
example_of_iterator_using_for_logs_cleanup(handlerton * hton)7565 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
7566 {
7567   void *buffer;
7568   int res= 1;
7569   struct handler_iterator iterator;
7570   struct handler_log_file_data data;
7571 
7572   if (!hton->create_iterator)
7573     return 1; /* iterator creator is not supported */
7574 
7575   if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
7576       HA_ITERATOR_OK)
7577   {
7578     /* error during creation of log iterator or iterator is not supported */
7579     return 1;
7580   }
7581   while((*iterator.next)(&iterator, (void*)&data) == 0)
7582   {
7583     printf("%s\n", data.filename.str);
7584     if (data.status == HA_LOG_STATUS_FREE &&
7585         mysql_file_delete(INSTRUMENT_ME,
7586                           data.filename.str, MYF(MY_WME)))
7587       goto err;
7588   }
7589   res= 0;
7590 err:
7591   (*iterator.destroy)(&iterator);
7592   return res;
7593 }
7594 
7595 
7596 /*
7597   Here we should get info from handler where it save logs but here is
7598   just example, so we use constant.
7599   IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
7600   rights on it except root and it consist of directories only at lest for
7601   *nix (sorry, can't find windows-safe solution here, but it is only example).
7602 */
7603 #define fl_dir FN_ROOTDIR
7604 
7605 
7606 /** @brief
7607   Dummy function to return log status should be replaced by function which
7608   really detect the log status and check that the file is a log of this
7609   handler.
7610 */
fl_get_log_status(char * log)7611 enum log_status fl_get_log_status(char *log)
7612 {
7613   MY_STAT stat_buff;
7614   if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
7615     return HA_LOG_STATUS_INUSE;
7616   return HA_LOG_STATUS_NOSUCHLOG;
7617 }
7618 
7619 
7620 struct fl_buff
7621 {
7622   LEX_STRING *names;
7623   enum log_status *statuses;
7624   uint32 entries;
7625   uint32 current;
7626 };
7627 
7628 
fl_log_iterator_next(struct handler_iterator * iterator,void * iterator_object)7629 int fl_log_iterator_next(struct handler_iterator *iterator,
7630                           void *iterator_object)
7631 {
7632   struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
7633   struct handler_log_file_data *data=
7634     (struct handler_log_file_data *) iterator_object;
7635   if (buff->current >= buff->entries)
7636     return 1;
7637   data->filename= buff->names[buff->current];
7638   data->status= buff->statuses[buff->current];
7639   buff->current++;
7640   return 0;
7641 }
7642 
7643 
fl_log_iterator_destroy(struct handler_iterator * iterator)7644 void fl_log_iterator_destroy(struct handler_iterator *iterator)
7645 {
7646   my_free(iterator->buffer);
7647 }
7648 
7649 
7650 /** @brief
7651   returns buffer, to be assigned in handler_iterator struct
7652 */
7653 enum handler_create_iterator_result
fl_log_iterator_buffer_init(struct handler_iterator * iterator)7654 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
7655 {
7656   MY_DIR *dirp;
7657   struct fl_buff *buff;
7658   char *name_ptr;
7659   uchar *ptr;
7660   FILEINFO *file;
7661   uint32 i;
7662 
7663   /* to be able to make my_free without crash in case of error */
7664   iterator->buffer= 0;
7665 
7666   if (!(dirp = my_dir(fl_dir, MYF(MY_THREAD_SPECIFIC))))
7667   {
7668     return HA_ITERATOR_ERROR;
7669   }
7670   if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
7671                              ((ALIGN_SIZE(sizeof(LEX_STRING)) +
7672                                sizeof(enum log_status) +
7673                                + FN_REFLEN + 1) *
7674                               (uint) dirp->number_off_files),
7675                              MYF(MY_THREAD_SPECIFIC))) == 0)
7676   {
7677     return HA_ITERATOR_ERROR;
7678   }
7679   buff= (struct fl_buff *)ptr;
7680   buff->entries= buff->current= 0;
7681   ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
7682   buff->names= (LEX_STRING*) (ptr);
7683   ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
7684                (uint) dirp->number_off_files));
7685   buff->statuses= (enum log_status *)(ptr);
7686   name_ptr= (char *)(ptr + (sizeof(enum log_status) *
7687                             (uint) dirp->number_off_files));
7688   for (i=0 ; i < (uint) dirp->number_off_files  ; i++)
7689   {
7690     enum log_status st;
7691     file= dirp->dir_entry + i;
7692     if ((file->name[0] == '.' &&
7693          ((file->name[1] == '.' && file->name[2] == '\0') ||
7694             file->name[1] == '\0')))
7695       continue;
7696     if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
7697       continue;
7698     name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
7699                        FN_REFLEN, fl_dir, file->name, NullS);
7700     buff->names[buff->entries].length= (name_ptr -
7701                                         buff->names[buff->entries].str);
7702     buff->statuses[buff->entries]= st;
7703     buff->entries++;
7704   }
7705 
7706   iterator->buffer= buff;
7707   iterator->next= &fl_log_iterator_next;
7708   iterator->destroy= &fl_log_iterator_destroy;
7709   my_dirend(dirp);
7710   return HA_ITERATOR_OK;
7711 }
7712 
7713 
7714 /* An example of a iterator creator */
7715 enum handler_create_iterator_result
fl_create_iterator(enum handler_iterator_type type,struct handler_iterator * iterator)7716 fl_create_iterator(enum handler_iterator_type type,
7717                    struct handler_iterator *iterator)
7718 {
7719   switch(type) {
7720   case HA_TRANSACTLOG_ITERATOR:
7721     return fl_log_iterator_buffer_init(iterator);
7722   default:
7723     return HA_ITERATOR_UNSUPPORTED;
7724   }
7725 }
7726 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
7727 
7728 
check_conflicting_charset_declarations(CHARSET_INFO * cs)7729 bool HA_CREATE_INFO::check_conflicting_charset_declarations(CHARSET_INFO *cs)
7730 {
7731   if ((used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
7732       /* DEFAULT vs explicit, or explicit vs DEFAULT */
7733       (((default_table_charset == NULL) != (cs == NULL)) ||
7734       /* Two different explicit character sets */
7735        (default_table_charset && cs &&
7736         !my_charset_same(default_table_charset, cs))))
7737   {
7738     my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
7739              "CHARACTER SET ", default_table_charset ?
7740                                default_table_charset->csname : "DEFAULT",
7741              "CHARACTER SET ", cs ? cs->csname : "DEFAULT");
7742     return true;
7743   }
7744   return false;
7745 }
7746 
7747 /* Remove all indexes for a given table from global index statistics */
7748 
7749 static
del_global_index_stats_for_table(THD * thd,uchar * cache_key,size_t cache_key_length)7750 int del_global_index_stats_for_table(THD *thd, uchar* cache_key, size_t cache_key_length)
7751 {
7752   int res = 0;
7753   DBUG_ENTER("del_global_index_stats_for_table");
7754 
7755   mysql_mutex_lock(&LOCK_global_index_stats);
7756 
7757   for (uint i= 0; i < global_index_stats.records;)
7758   {
7759     INDEX_STATS *index_stats =
7760       (INDEX_STATS*) my_hash_element(&global_index_stats, i);
7761 
7762     /* We search correct db\0table_name\0 string */
7763     if (index_stats &&
7764 	index_stats->index_name_length >= cache_key_length &&
7765 	!memcmp(index_stats->index, cache_key, cache_key_length))
7766     {
7767       res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7768       /*
7769           In our HASH implementation on deletion one elements
7770           is moved into a place where a deleted element was,
7771           and the last element is moved into the empty space.
7772           Thus we need to re-examine the current element, but
7773           we don't have to restart the search from the beginning.
7774       */
7775     }
7776     else
7777       i++;
7778   }
7779 
7780   mysql_mutex_unlock(&LOCK_global_index_stats);
7781   DBUG_RETURN(res);
7782 }
7783 
7784 /* Remove a table from global table statistics */
7785 
del_global_table_stat(THD * thd,const LEX_CSTRING * db,const LEX_CSTRING * table)7786 int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table)
7787 {
7788   TABLE_STATS *table_stats;
7789   int res = 0;
7790   uchar *cache_key;
7791   size_t cache_key_length;
7792   DBUG_ENTER("del_global_table_stat");
7793 
7794   cache_key_length= db->length + 1 + table->length + 1;
7795 
7796   if(!(cache_key= (uchar *)my_malloc(PSI_INSTRUMENT_ME, cache_key_length,
7797                                      MYF(MY_WME | MY_ZEROFILL))))
7798   {
7799     /* Out of memory error already given */
7800     res = 1;
7801     goto end;
7802   }
7803 
7804   memcpy(cache_key, db->str, db->length);
7805   memcpy(cache_key + db->length + 1, table->str, table->length);
7806 
7807   res= del_global_index_stats_for_table(thd, cache_key, cache_key_length);
7808 
7809   mysql_mutex_lock(&LOCK_global_table_stats);
7810 
7811   if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats,
7812                                                 cache_key,
7813                                                 cache_key_length)))
7814     res= my_hash_delete(&global_table_stats, (uchar*)table_stats);
7815 
7816   my_free(cache_key);
7817   mysql_mutex_unlock(&LOCK_global_table_stats);
7818 
7819 end:
7820   DBUG_RETURN(res);
7821 }
7822 
7823 /* Remove a index from global index statistics */
7824 
del_global_index_stat(THD * thd,TABLE * table,KEY * key_info)7825 int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info)
7826 {
7827   INDEX_STATS *index_stats;
7828   size_t key_length= table->s->table_cache_key.length + key_info->name.length + 1;
7829   int res = 0;
7830   DBUG_ENTER("del_global_index_stat");
7831   mysql_mutex_lock(&LOCK_global_index_stats);
7832 
7833   if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
7834                                                 key_info->cache_name,
7835                                                 key_length)))
7836     res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7837 
7838   mysql_mutex_unlock(&LOCK_global_index_stats);
7839   DBUG_RETURN(res);
7840 }
7841 
7842 /*****************************************************************************
7843   VERSIONING functions
7844 ******************************************************************************/
7845 
is_start(const char * name) const7846 bool Vers_parse_info::is_start(const char *name) const
7847 {
7848   DBUG_ASSERT(name);
7849   return as_row.start && as_row.start.streq(name);
7850 }
is_end(const char * name) const7851 bool Vers_parse_info::is_end(const char *name) const
7852 {
7853   DBUG_ASSERT(name);
7854   return as_row.end && as_row.end.streq(name);
7855 }
is_start(const Create_field & f) const7856 bool Vers_parse_info::is_start(const Create_field &f) const
7857 {
7858   return f.flags & VERS_ROW_START;
7859 }
is_end(const Create_field & f) const7860 bool Vers_parse_info::is_end(const Create_field &f) const
7861 {
7862   return f.flags & VERS_ROW_END;
7863 }
7864 
vers_init_sys_field(THD * thd,const char * field_name,int flags,bool integer)7865 static Create_field *vers_init_sys_field(THD *thd, const char *field_name, int flags, bool integer)
7866 {
7867   Create_field *f= new (thd->mem_root) Create_field();
7868   if (!f)
7869     return NULL;
7870 
7871   f->field_name.str= field_name;
7872   f->field_name.length= strlen(field_name);
7873   f->charset= system_charset_info;
7874   f->flags= flags | NOT_NULL_FLAG;
7875   if (integer)
7876   {
7877     DBUG_ASSERT(0); // Not implemented yet
7878     f->set_handler(&type_handler_vers_trx_id);
7879     f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1;
7880     f->flags|= UNSIGNED_FLAG;
7881   }
7882   else
7883   {
7884     f->set_handler(&type_handler_timestamp2);
7885     f->length= MAX_DATETIME_PRECISION;
7886   }
7887   f->invisible= DBUG_EVALUATE_IF("sysvers_show", VISIBLE, INVISIBLE_SYSTEM);
7888 
7889   if (f->check(thd))
7890     return NULL;
7891 
7892   return f;
7893 }
7894 
vers_create_sys_field(THD * thd,const char * field_name,Alter_info * alter_info,int flags)7895 static bool vers_create_sys_field(THD *thd, const char *field_name,
7896                                   Alter_info *alter_info, int flags)
7897 {
7898   Create_field *f= vers_init_sys_field(thd, field_name, flags, false);
7899   if (!f)
7900     return true;
7901 
7902   alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7903   alter_info->create_list.push_back(f);
7904 
7905   return false;
7906 }
7907 
7908 const Lex_ident Vers_parse_info::default_start= "row_start";
7909 const Lex_ident Vers_parse_info::default_end= "row_end";
7910 
fix_implicit(THD * thd,Alter_info * alter_info)7911 bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info)
7912 {
7913   // If user specified some of these he must specify the others too. Do nothing.
7914   if (*this)
7915     return false;
7916 
7917   alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7918 
7919   period= start_end_t(default_start, default_end);
7920   as_row= period;
7921 
7922   if (vers_create_sys_field(thd, default_start, alter_info, VERS_ROW_START) ||
7923       vers_create_sys_field(thd, default_end, alter_info, VERS_ROW_END))
7924   {
7925     return true;
7926   }
7927   return false;
7928 }
7929 
7930 
vers_fix_system_fields(THD * thd,Alter_info * alter_info,const TABLE_LIST & create_table)7931 bool Table_scope_and_contents_source_st::vers_fix_system_fields(
7932   THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table)
7933 {
7934   DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING));
7935 
7936   DBUG_EXECUTE_IF("sysvers_force", if (!tmp_table()) {
7937                   alter_info->flags|= ALTER_ADD_SYSTEM_VERSIONING;
7938                   options|= HA_VERSIONED_TABLE; });
7939 
7940   if (!vers_info.need_check(alter_info))
7941     return false;
7942 
7943   const bool add_versioning= alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING;
7944 
7945   if (!vers_info.versioned_fields && vers_info.unversioned_fields && !add_versioning)
7946   {
7947     // All is correct but this table is not versioned.
7948     options&= ~HA_VERSIONED_TABLE;
7949     return false;
7950   }
7951 
7952   if (!add_versioning && vers_info && !vers_info.versioned_fields)
7953   {
7954     my_error(ER_MISSING, MYF(0), create_table.table_name.str,
7955              "WITH SYSTEM VERSIONING");
7956     return true;
7957   }
7958 
7959   List_iterator<Create_field> it(alter_info->create_list);
7960   while (Create_field *f= it++)
7961   {
7962     if (f->vers_sys_field())
7963       continue;
7964     if ((f->versioning == Column_definition::VERSIONING_NOT_SET && !add_versioning) ||
7965         f->versioning == Column_definition::WITHOUT_VERSIONING)
7966     {
7967       f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
7968     }
7969   } // while (Create_field *f= it++)
7970 
7971   if (vers_info.fix_implicit(thd, alter_info))
7972     return true;
7973 
7974   return false;
7975 }
7976 
7977 
vers_check_system_fields(THD * thd,Alter_info * alter_info,const Lex_table_name & table_name,const Lex_table_name & db,int select_count)7978 bool Table_scope_and_contents_source_st::vers_check_system_fields(
7979         THD *thd, Alter_info *alter_info, const Lex_table_name &table_name,
7980         const Lex_table_name &db, int select_count)
7981 {
7982   if (!(options & HA_VERSIONED_TABLE))
7983     return false;
7984 
7985   uint versioned_fields= 0;
7986 
7987   if (!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING))
7988   {
7989     uint fieldnr= 0;
7990     List_iterator<Create_field> field_it(alter_info->create_list);
7991     while (Create_field *f= field_it++)
7992     {
7993       /*
7994          The field from the CREATE part can be duplicated in the SELECT part of
7995          CREATE...SELECT. In that case double counts should be avoided.
7996          select_create::create_table_from_items just pushes the fields back into
7997          the create_list, without additional manipulations, so the fields from
7998          SELECT go last there.
7999        */
8000       bool is_dup= false;
8001       if (fieldnr >= alter_info->create_list.elements - select_count)
8002       {
8003         List_iterator<Create_field> dup_it(alter_info->create_list);
8004         for (Create_field *dup= dup_it++; !is_dup && dup != f; dup= dup_it++)
8005           is_dup= Lex_ident(dup->field_name).streq(f->field_name);
8006       }
8007 
8008       if (!(f->flags & VERS_UPDATE_UNVERSIONED_FLAG) && !is_dup)
8009         versioned_fields++;
8010       fieldnr++;
8011     }
8012     if (versioned_fields == VERSIONING_FIELDS)
8013     {
8014       my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(0), table_name.str);
8015       return true;
8016     }
8017   }
8018 
8019   if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && !versioned_fields)
8020     return false;
8021 
8022   return vers_info.check_sys_fields(table_name, db, alter_info);
8023 }
8024 
8025 
fix_alter_info(THD * thd,Alter_info * alter_info,HA_CREATE_INFO * create_info,TABLE * table)8026 bool Vers_parse_info::fix_alter_info(THD *thd, Alter_info *alter_info,
8027                                      HA_CREATE_INFO *create_info, TABLE *table)
8028 {
8029   TABLE_SHARE *share= table->s;
8030   const char *table_name= share->table_name.str;
8031 
8032   if (!need_check(alter_info) && !share->versioned)
8033     return false;
8034 
8035   if (DBUG_EVALUATE_IF("sysvers_force", 0, share->tmp_table))
8036   {
8037     my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "CREATE TEMPORARY TABLE");
8038     return true;
8039   }
8040 
8041   if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING &&
8042       table->versioned())
8043   {
8044     my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
8045     return true;
8046   }
8047 
8048   if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)
8049   {
8050     if (!share->versioned)
8051     {
8052       my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
8053       return true;
8054     }
8055 #ifdef WITH_PARTITION_STORAGE_ENGINE
8056     if (table->part_info &&
8057         table->part_info->part_type == VERSIONING_PARTITION)
8058     {
8059       my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(0), table_name);
8060       return true;
8061     }
8062 #endif
8063 
8064     return false;
8065   }
8066 
8067   if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING))
8068   {
8069     List_iterator_fast<Create_field> it(alter_info->create_list);
8070     while (Create_field *f= it++)
8071     {
8072       if (f->flags & VERS_SYSTEM_FIELD)
8073       {
8074         if (!table->versioned())
8075         {
8076           my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->s->table_name.str);
8077           return true;
8078         }
8079         my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0),
8080                  f->flags & VERS_ROW_START ? "START" : "END", f->field_name.str);
8081         return true;
8082       }
8083     }
8084   }
8085 
8086   if ((alter_info->flags & ALTER_DROP_PERIOD ||
8087        versioned_fields || unversioned_fields) && !share->versioned)
8088   {
8089     my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
8090     return true;
8091   }
8092 
8093   if (share->versioned)
8094   {
8095     if (alter_info->flags & ALTER_ADD_PERIOD)
8096     {
8097       my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
8098       return true;
8099     }
8100 
8101     // copy info from existing table
8102     create_info->options|= HA_VERSIONED_TABLE;
8103 
8104     DBUG_ASSERT(share->vers_start_field());
8105     DBUG_ASSERT(share->vers_end_field());
8106     Lex_ident start(share->vers_start_field()->field_name);
8107     Lex_ident end(share->vers_end_field()->field_name);
8108     DBUG_ASSERT(start.str);
8109     DBUG_ASSERT(end.str);
8110 
8111     as_row= start_end_t(start, end);
8112     period= as_row;
8113 
8114     if (alter_info->create_list.elements)
8115     {
8116       List_iterator_fast<Create_field> it(alter_info->create_list);
8117       while (Create_field *f= it++)
8118       {
8119         if (f->versioning == Column_definition::WITHOUT_VERSIONING)
8120           f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
8121 
8122         if (f->change.str && (start.streq(f->change) || end.streq(f->change)))
8123         {
8124           my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str);
8125           return true;
8126         }
8127       }
8128     }
8129 
8130     return false;
8131   }
8132 
8133   if (fix_implicit(thd, alter_info))
8134     return true;
8135 
8136   if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING)
8137   {
8138     if (check_sys_fields(table_name, share->db, alter_info))
8139       return true;
8140   }
8141 
8142   return false;
8143 }
8144 
8145 bool
fix_create_like(Alter_info & alter_info,HA_CREATE_INFO & create_info,TABLE_LIST & src_table,TABLE_LIST & table)8146 Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info,
8147                                  TABLE_LIST &src_table, TABLE_LIST &table)
8148 {
8149   List_iterator<Create_field> it(alter_info.create_list);
8150   List_iterator<Key> key_it(alter_info.key_list);
8151   List_iterator<Key_part_spec> kp_it;
8152   Create_field *f, *f_start=NULL, *f_end= NULL;
8153 
8154   DBUG_ASSERT(alter_info.create_list.elements > 2);
8155 
8156   if (create_info.tmp_table())
8157   {
8158     int remove= 2;
8159     while (remove && (f= it++))
8160     {
8161       if (f->flags & VERS_SYSTEM_FIELD)
8162       {
8163         it.remove();
8164         remove--;
8165       }
8166       key_it.rewind();
8167       while (Key *key= key_it++)
8168       {
8169         kp_it.init(key->columns);
8170         while (Key_part_spec *kp= kp_it++)
8171         {
8172           if (0 == lex_string_cmp(system_charset_info, &kp->field_name,
8173                                   &f->field_name))
8174           {
8175             kp_it.remove();
8176           }
8177         }
8178         if (0 == key->columns.elements)
8179         {
8180           key_it.remove();
8181         }
8182       }
8183     }
8184     DBUG_ASSERT(remove == 0);
8185     push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
8186                         ER_UNKNOWN_ERROR,
8187                         "System versioning is stripped from temporary `%s.%s`",
8188                         table.db.str, table.table_name.str);
8189     return false;
8190   }
8191 
8192   while ((f= it++))
8193   {
8194     if (f->flags & VERS_ROW_START)
8195     {
8196       f_start= f;
8197       if (f_end)
8198         break;
8199     }
8200     else if (f->flags & VERS_ROW_END)
8201     {
8202       f_end= f;
8203       if (f_start)
8204         break;
8205     }
8206   }
8207 
8208   if (!f_start || !f_end)
8209   {
8210     my_error(ER_MISSING, MYF(0), src_table.table_name.str,
8211              f_start ? "AS ROW END" : "AS ROW START");
8212     return true;
8213   }
8214 
8215   as_row= start_end_t(f_start->field_name, f_end->field_name);
8216   period= as_row;
8217 
8218   create_info.options|= HA_VERSIONED_TABLE;
8219   return false;
8220 }
8221 
need_check(const Alter_info * alter_info) const8222 bool Vers_parse_info::need_check(const Alter_info *alter_info) const
8223 {
8224   return versioned_fields || unversioned_fields ||
8225          alter_info->flags & ALTER_ADD_PERIOD ||
8226          alter_info->flags & ALTER_DROP_PERIOD ||
8227          alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING ||
8228          alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING || *this;
8229 }
8230 
check_conditions(const Lex_table_name & table_name,const Lex_table_name & db) const8231 bool Vers_parse_info::check_conditions(const Lex_table_name &table_name,
8232                                        const Lex_table_name &db) const
8233 {
8234   if (!as_row.start || !as_row.end)
8235   {
8236     my_error(ER_MISSING, MYF(0), table_name.str,
8237                 as_row.start ? "AS ROW END" : "AS ROW START");
8238     return true;
8239   }
8240 
8241   if (!period.start || !period.end)
8242   {
8243     my_error(ER_MISSING, MYF(0), table_name.str, "PERIOD FOR SYSTEM_TIME");
8244     return true;
8245   }
8246 
8247   if (!as_row.start.streq(period.start) ||
8248       !as_row.end.streq(period.end))
8249   {
8250     my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
8251     return true;
8252   }
8253 
8254   if (db.streq(MYSQL_SCHEMA_NAME))
8255   {
8256     my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(0), MYSQL_SCHEMA_NAME.str);
8257     return true;
8258   }
8259   return false;
8260 }
8261 
is_versioning_timestamp(const Column_definition * f)8262 static bool is_versioning_timestamp(const Column_definition *f)
8263 {
8264   return f->type_handler() == &type_handler_timestamp2 &&
8265          f->length == MAX_DATETIME_FULL_WIDTH;
8266 }
8267 
is_some_bigint(const Column_definition * f)8268 static bool is_some_bigint(const Column_definition *f)
8269 {
8270   return f->type_handler() == &type_handler_slonglong ||
8271          f->type_handler() == &type_handler_ulonglong ||
8272          f->type_handler() == &type_handler_vers_trx_id;
8273 }
8274 
is_versioning_bigint(const Column_definition * f)8275 static bool is_versioning_bigint(const Column_definition *f)
8276 {
8277   return is_some_bigint(f) && f->flags & UNSIGNED_FLAG &&
8278          f->length == MY_INT64_NUM_DECIMAL_DIGITS - 1;
8279 }
8280 
require_timestamp_error(const char * field,const char * table)8281 static void require_timestamp_error(const char *field, const char *table)
8282 {
8283   my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "TIMESTAMP(6)", table);
8284 }
8285 
require_trx_id_error(const char * field,const char * table)8286 static void require_trx_id_error(const char *field, const char *table)
8287 {
8288   my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field, "BIGINT(20) UNSIGNED",
8289            table);
8290 }
8291 
8292 
check_sys_fields(const LEX_CSTRING & table_name,const Column_definition * row_start,const Column_definition * row_end) const8293 bool Vers_type_timestamp::check_sys_fields(const LEX_CSTRING &table_name,
8294                                            const Column_definition *row_start,
8295                                            const Column_definition *row_end) const
8296 {
8297   if (!is_versioning_timestamp(row_start))
8298   {
8299     require_timestamp_error(row_start->field_name.str, table_name.str);
8300     return true;
8301   }
8302 
8303   if (row_end->type_handler()->vers() != this ||
8304       !is_versioning_timestamp(row_end))
8305   {
8306     require_timestamp_error(row_end->field_name.str, table_name.str);
8307     return true;
8308   }
8309 
8310   return false;
8311 }
8312 
8313 
check_sys_fields(const LEX_CSTRING & table_name,const Column_definition * row_start,const Column_definition * row_end) const8314 bool Vers_type_trx::check_sys_fields(const LEX_CSTRING &table_name,
8315                                      const Column_definition *row_start,
8316                                      const Column_definition *row_end) const
8317 {
8318   if (!is_versioning_bigint(row_start))
8319   {
8320     require_trx_id_error(row_start->field_name.str, table_name.str);
8321     return true;
8322   }
8323 
8324   if (row_end->type_handler()->vers() != this ||
8325       !is_versioning_bigint(row_end))
8326   {
8327     require_trx_id_error(row_end->field_name.str, table_name.str);
8328     return true;
8329   }
8330 
8331   if (!is_some_bigint(row_start))
8332   {
8333     require_timestamp_error(row_start->field_name.str, table_name.str);
8334     return true;
8335   }
8336 
8337   if (!TR_table::use_transaction_registry)
8338   {
8339     my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
8340     return true;
8341   }
8342 
8343   return false;
8344 }
8345 
8346 
check_sys_fields(const Lex_table_name & table_name,const Lex_table_name & db,Alter_info * alter_info) const8347 bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
8348                                        const Lex_table_name &db,
8349                                        Alter_info *alter_info) const
8350 {
8351   if (check_conditions(table_name, db))
8352     return true;
8353 
8354   List_iterator<Create_field> it(alter_info->create_list);
8355   const Create_field *row_start= nullptr;
8356   const Create_field *row_end= nullptr;
8357   while (const Create_field *f= it++)
8358   {
8359     if (f->flags & VERS_ROW_START && !row_start)
8360       row_start= f;
8361     if (f->flags & VERS_ROW_END && !row_end)
8362       row_end= f;
8363   }
8364 
8365   if (!row_start || !row_end)
8366   {
8367     my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
8368     return true;
8369   }
8370 
8371   const Vers_type_handler *row_start_vers= row_start->type_handler()->vers();
8372 
8373   if (!row_start_vers)
8374   {
8375     require_timestamp_error(row_start->field_name.str, table_name);
8376     return true;
8377   }
8378 
8379   return row_start_vers->check_sys_fields(table_name, row_start, row_end);
8380 }
8381 
check_field(const Create_field * f,const Lex_ident & f_name) const8382 bool Table_period_info::check_field(const Create_field* f,
8383                                     const Lex_ident& f_name) const
8384 {
8385   bool res= false;
8386   if (!f)
8387   {
8388     my_error(ER_BAD_FIELD_ERROR, MYF(0), f_name.str, name.str);
8389     res= true;
8390   }
8391   else if (f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATE &&
8392            f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATETIME)
8393   {
8394     my_error(ER_WRONG_FIELD_SPEC, MYF(0), f->field_name.str);
8395     res= true;
8396   }
8397   else if (f->vcol_info || f->flags & VERS_SYSTEM_FIELD)
8398   {
8399     my_error(ER_PERIOD_FIELD_WRONG_ATTRIBUTES, MYF(0),
8400              f->field_name.str, "GENERATED ALWAYS AS");
8401     res= true;
8402   }
8403 
8404   return res;
8405 }
8406 
check_fields(THD * thd,Alter_info * alter_info,const Lex_table_name & table_name,const Lex_table_name & db,int select_count)8407 bool Table_scope_and_contents_source_st::check_fields(
8408   THD *thd, Alter_info *alter_info,
8409   const Lex_table_name &table_name, const Lex_table_name &db, int select_count)
8410 {
8411   return vers_check_system_fields(thd, alter_info,
8412                                   table_name, db, select_count) ||
8413     check_period_fields(thd, alter_info);
8414 }
8415 
check_period_fields(THD * thd,Alter_info * alter_info)8416 bool Table_scope_and_contents_source_st::check_period_fields(
8417                 THD *thd, Alter_info *alter_info)
8418 {
8419   if (!period_info.name)
8420     return false;
8421 
8422   if (tmp_table())
8423   {
8424     my_error(ER_PERIOD_TEMPORARY_NOT_ALLOWED, MYF(0));
8425     return true;
8426   }
8427 
8428   Table_period_info::start_end_t &period= period_info.period;
8429   const Create_field *row_start= NULL;
8430   const Create_field *row_end= NULL;
8431   List_iterator<Create_field> it(alter_info->create_list);
8432   while (const Create_field *f= it++)
8433   {
8434     if (period.start.streq(f->field_name)) row_start= f;
8435     else if (period.end.streq(f->field_name)) row_end= f;
8436 
8437     if (period_info.name.streq(f->field_name))
8438     {
8439       my_error(ER_DUP_FIELDNAME, MYF(0), f->field_name.str);
8440       return true;
8441     }
8442   }
8443 
8444   bool res= period_info.check_field(row_start, period.start.str)
8445             || period_info.check_field(row_end, period.end.str);
8446   if (res)
8447     return true;
8448 
8449   if (row_start->type_handler() != row_end->type_handler()
8450       || row_start->length != row_end->length)
8451   {
8452     my_error(ER_PERIOD_TYPES_MISMATCH, MYF(0), period_info.name.str);
8453     res= true;
8454   }
8455 
8456   return res;
8457 }
8458 
8459 bool
fix_create_fields(THD * thd,Alter_info * alter_info,const TABLE_LIST & create_table)8460 Table_scope_and_contents_source_st::fix_create_fields(THD *thd,
8461                                                       Alter_info *alter_info,
8462                                                       const TABLE_LIST &create_table)
8463 {
8464   return vers_fix_system_fields(thd, alter_info, create_table)
8465          || fix_period_fields(thd, alter_info);
8466 }
8467 
8468 bool
fix_period_fields(THD * thd,Alter_info * alter_info)8469 Table_scope_and_contents_source_st::fix_period_fields(THD *thd,
8470                                                       Alter_info *alter_info)
8471 {
8472   if (!period_info.name)
8473     return false;
8474 
8475   Table_period_info::start_end_t &period= period_info.period;
8476   List_iterator<Create_field> it(alter_info->create_list);
8477   while (Create_field *f= it++)
8478   {
8479     if (period.start.streq(f->field_name) || period.end.streq(f->field_name))
8480     {
8481       f->period= &period_info;
8482       f->flags|= NOT_NULL_FLAG;
8483     }
8484   }
8485   return false;
8486 }
8487