1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2021, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software Foundation,
15    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16 
17 /** @file handler.cc
18 
19     @brief
20   Handler-calling-functions
21 */
22 
23 #include "mariadb.h"
24 #include <inttypes.h>
25 #include "sql_priv.h"
26 #include "unireg.h"
27 #include "rpl_rli.h"
28 #include "sql_cache.h"                   // query_cache, query_cache_*
29 #include "sql_connect.h"                 // global_table_stats
30 #include "key.h"     // key_copy, key_unpack, key_cmp_if_same, key_cmp
31 #include "sql_table.h"                   // build_table_filename
32 #include "sql_parse.h"                          // check_stack_overrun
33 #include "sql_acl.h"            // SUPER_ACL
34 #include "sql_base.h"           // TDC_element
35 #include "discover.h"           // extension_based_table_discovery, etc
36 #include "log_event.h"          // *_rows_log_event
37 #include "create_options.h"
38 #include <myisampack.h>
39 #include "transaction.h"
40 #include "myisam.h"
41 #include "probes_mysql.h"
42 #include <mysql/psi/mysql_table.h>
43 #include "debug_sync.h"         // DEBUG_SYNC
44 #include "sql_audit.h"
45 #include "ha_sequence.h"
46 #include "rowid_filter.h"
47 
48 #ifdef WITH_PARTITION_STORAGE_ENGINE
49 #include "ha_partition.h"
50 #endif
51 
52 #ifdef WITH_ARIA_STORAGE_ENGINE
53 #include "../storage/maria/ha_maria.h"
54 #endif
55 #include "semisync_master.h"
56 
57 #include "wsrep_mysqld.h"
58 #ifdef WITH_WSREP
59 #include "wsrep_binlog.h"
60 #include "wsrep_xid.h"
61 #include "wsrep_thd.h"
62 #include "wsrep_trans_observer.h" /* wsrep transaction hooks */
63 #endif /* WITH_WSREP */
64 
65 /*
66   While we have legacy_db_type, we have this array to
67   check for dups and to find handlerton from legacy_db_type.
68   Remove when legacy_db_type is finally gone
69 */
70 st_plugin_int *hton2plugin[MAX_HA];
71 
72 static handlerton *installed_htons[128];
73 
74 #define BITMAP_STACKBUF_SIZE (128/8)
75 
76 KEY_CREATE_INFO default_key_create_info=
77 { HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true };
78 
79 /* number of entries in handlertons[] */
80 ulong total_ha= 0;
81 /* number of storage engines (from handlertons[]) that support 2pc */
82 ulong total_ha_2pc= 0;
83 #ifdef DBUG_ASSERT_EXISTS
84 /*
85   Number of non-mandatory 2pc handlertons whose initialization failed
86   to estimate total_ha_2pc value under supposition of the failures
87   have not occcured.
88 */
89 ulong failed_ha_2pc= 0;
90 #endif
91 /* size of savepoint storage area (see ha_init) */
92 ulong savepoint_alloc_size= 0;
93 
94 static const LEX_CSTRING sys_table_aliases[]=
95 {
96   { STRING_WITH_LEN("INNOBASE") },  { STRING_WITH_LEN("INNODB") },
97   { STRING_WITH_LEN("HEAP") },      { STRING_WITH_LEN("MEMORY") },
98   { STRING_WITH_LEN("MERGE") },     { STRING_WITH_LEN("MRG_MYISAM") },
99   { STRING_WITH_LEN("Maria") },     { STRING_WITH_LEN("Aria") },
100   {NullS, 0}
101 };
102 
103 const char *ha_row_type[] = {
104   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE"
105 };
106 
107 const char *tx_isolation_names[] =
108 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
109   NullS};
110 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
111 			       tx_isolation_names, NULL};
112 
113 static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
114 uint known_extensions_id= 0;
115 
116 static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
117                               bool is_real_trans);
118 
119 
120 static plugin_ref ha_default_plugin(THD *thd)
121 {
122   if (thd->variables.table_plugin)
123     return thd->variables.table_plugin;
124   return my_plugin_lock(thd, global_system_variables.table_plugin);
125 }
126 
127 static plugin_ref ha_default_tmp_plugin(THD *thd)
128 {
129   if (thd->variables.tmp_table_plugin)
130     return thd->variables.tmp_table_plugin;
131   if (global_system_variables.tmp_table_plugin)
132     return my_plugin_lock(thd, global_system_variables.tmp_table_plugin);
133   return ha_default_plugin(thd);
134 }
135 
136 #if defined(WITH_ARIA_STORAGE_ENGINE) && MYSQL_VERSION_ID < 100500
137 void ha_maria_implicit_commit(THD *thd, bool new_trn)
138 {
139   if (ha_maria::has_active_transaction(thd))
140   {
141     int error;
142     MDL_request mdl_request;
143     mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
144     error= thd->mdl_context.acquire_lock(&mdl_request,
145                                          thd->variables.lock_wait_timeout);
146     ha_maria::implicit_commit(thd, new_trn);
147     if (!error)
148       thd->mdl_context.release_lock(mdl_request.ticket);
149   }
150 }
151 #endif
152 
153 
154 /** @brief
155   Return the default storage engine handlerton for thread
156 
157   SYNOPSIS
158     ha_default_handlerton(thd)
159     thd         current thread
160 
161   RETURN
162     pointer to handlerton
163 */
164 handlerton *ha_default_handlerton(THD *thd)
165 {
166   plugin_ref plugin= ha_default_plugin(thd);
167   DBUG_ASSERT(plugin);
168   handlerton *hton= plugin_hton(plugin);
169   DBUG_ASSERT(hton);
170   return hton;
171 }
172 
173 
174 handlerton *ha_default_tmp_handlerton(THD *thd)
175 {
176   plugin_ref plugin= ha_default_tmp_plugin(thd);
177   DBUG_ASSERT(plugin);
178   handlerton *hton= plugin_hton(plugin);
179   DBUG_ASSERT(hton);
180   return hton;
181 }
182 
183 
184 /** @brief
185   Return the storage engine handlerton for the supplied name
186 
187   SYNOPSIS
188     ha_resolve_by_name(thd, name)
189     thd         current thread
190     name        name of storage engine
191 
192   RETURN
193     pointer to storage engine plugin handle
194 */
195 plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
196                               bool tmp_table)
197 {
198   const LEX_CSTRING *table_alias;
199   plugin_ref plugin;
200 
201 redo:
202   /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
203   if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
204                            (const uchar *)name->str, name->length,
205                            (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
206     return tmp_table ?  ha_default_tmp_plugin(thd) : ha_default_plugin(thd);
207 
208   if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
209   {
210     handlerton *hton= plugin_hton(plugin);
211     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
212       return plugin;
213 
214     /*
215       unlocking plugin immediately after locking is relatively low cost.
216     */
217     plugin_unlock(thd, plugin);
218   }
219 
220   /*
221     We check for the historical aliases.
222   */
223   for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
224   {
225     if (!my_strnncoll(&my_charset_latin1,
226                       (const uchar *)name->str, name->length,
227                       (const uchar *)table_alias->str, table_alias->length))
228     {
229       name= table_alias + 1;
230       goto redo;
231     }
232   }
233 
234   return NULL;
235 }
236 
237 
238 bool
239 Storage_engine_name::resolve_storage_engine_with_error(THD *thd,
240                                                        handlerton **ha,
241                                                        bool tmp_table)
242 {
243   if (plugin_ref plugin= ha_resolve_by_name(thd, &m_storage_engine_name,
244                                             tmp_table))
245   {
246     *ha= plugin_hton(plugin);
247     return false;
248   }
249 
250   *ha= NULL;
251   if (thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)
252   {
253     my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), m_storage_engine_name.str);
254     return true;
255   }
256   push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
257                       ER_UNKNOWN_STORAGE_ENGINE,
258                       ER_THD(thd, ER_UNKNOWN_STORAGE_ENGINE),
259                       m_storage_engine_name.str);
260   return false;
261 }
262 
263 
264 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
265 {
266   if (hton)
267   {
268     st_plugin_int *plugin= hton2plugin[hton->slot];
269     return my_plugin_lock(thd, plugin_int_to_ref(plugin));
270   }
271   return NULL;
272 }
273 
274 
275 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
276 {
277   plugin_ref plugin;
278   switch (db_type) {
279   case DB_TYPE_DEFAULT:
280     return ha_default_handlerton(thd);
281   default:
282     if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
283         (plugin= ha_lock_engine(thd, installed_htons[db_type])))
284       return plugin_hton(plugin);
285     /* fall through */
286   case DB_TYPE_UNKNOWN:
287     return NULL;
288   }
289 }
290 
291 
292 /**
293   Use other database handler if databasehandler is not compiled in.
294 */
295 handlerton *ha_checktype(THD *thd, handlerton *hton, bool no_substitute)
296 {
297   if (ha_storage_engine_is_enabled(hton))
298     return hton;
299 
300   if (no_substitute)
301     return NULL;
302 #ifdef WITH_WSREP
303   (void)wsrep_after_rollback(thd, false);
304 #endif /* WITH_WSREP */
305 
306   return ha_default_handlerton(thd);
307 } /* ha_checktype */
308 
309 
310 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
311                          handlerton *db_type)
312 {
313   handler *file;
314   DBUG_ENTER("get_new_handler");
315   DBUG_PRINT("enter", ("alloc: %p", alloc));
316 
317   if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
318   {
319     if ((file= db_type->create(db_type, share, alloc)))
320       file->init();
321     DBUG_RETURN(file);
322   }
323   /*
324     Try the default table type
325     Here the call to current_thd() is ok as we call this function a lot of
326     times but we enter this branch very seldom.
327   */
328   file= get_new_handler(share, alloc, ha_default_handlerton(current_thd));
329   DBUG_RETURN(file);
330 }
331 
332 
333 #ifdef WITH_PARTITION_STORAGE_ENGINE
334 handler *get_ha_partition(partition_info *part_info)
335 {
336   ha_partition *partition;
337   DBUG_ENTER("get_ha_partition");
338   if ((partition= new ha_partition(partition_hton, part_info)))
339   {
340     if (partition->initialize_partition(current_thd->mem_root))
341     {
342       delete partition;
343       partition= 0;
344     }
345     else
346       partition->init();
347   }
348   else
349   {
350     my_error(ER_OUTOFMEMORY, MYF(ME_FATAL),
351              static_cast<int>(sizeof(ha_partition)));
352   }
353   DBUG_RETURN(((handler*) partition));
354 }
355 #endif
356 
357 static const char **handler_errmsgs;
358 
359 C_MODE_START
360 static const char **get_handler_errmsgs(int nr)
361 {
362   return handler_errmsgs;
363 }
364 C_MODE_END
365 
366 
367 /**
368   Register handler error messages for use with my_error().
369 
370   @retval
371     0           OK
372   @retval
373     !=0         Error
374 */
375 
376 int ha_init_errors(void)
377 {
378 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
379 
380   /* Allocate a pointer array for the error message strings. */
381   /* Zerofill it to avoid uninitialized gaps. */
382   if (! (handler_errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
383                                                    MYF(MY_WME | MY_ZEROFILL))))
384     return 1;
385 
386   /* Set the dedicated error messages. */
387   SETMSG(HA_ERR_KEY_NOT_FOUND,          ER_DEFAULT(ER_KEY_NOT_FOUND));
388   SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER_DEFAULT(ER_DUP_KEY));
389   SETMSG(HA_ERR_RECORD_CHANGED,         "Update which is recoverable");
390   SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
391   SETMSG(HA_ERR_CRASHED,                ER_DEFAULT(ER_NOT_KEYFILE));
392   SETMSG(HA_ERR_WRONG_IN_RECORD,        ER_DEFAULT(ER_CRASHED_ON_USAGE));
393   SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
394   SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
395   SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
396   SETMSG(HA_ERR_OLD_FILE,               ER_DEFAULT(ER_OLD_KEYFILE));
397   SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
398   SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
399   SETMSG(HA_ERR_RECORD_FILE_FULL,       ER_DEFAULT(ER_RECORD_FILE_FULL));
400   SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
401   SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
402   SETMSG(HA_ERR_UNSUPPORTED,            ER_DEFAULT(ER_ILLEGAL_HA));
403   SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
404   SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
405   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER_DEFAULT(ER_DUP_UNIQUE));
406   SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
407   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER_DEFAULT(ER_WRONG_MRG_TABLE));
408   SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER_DEFAULT(ER_CRASHED_ON_REPAIR));
409   SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER_DEFAULT(ER_CRASHED_ON_USAGE));
410   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
411   SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER_DEFAULT(ER_LOCK_TABLE_FULL));
412   SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
413   SETMSG(HA_ERR_LOCK_DEADLOCK,          ER_DEFAULT(ER_LOCK_DEADLOCK));
414   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
415   SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
416   SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
417   SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
418   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
419   SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
420   SETMSG(HA_ERR_TABLE_EXIST,            ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
421   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
422   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
423   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
424   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
425   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
426   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
427   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
428   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
429   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,	ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
430   SETMSG(HA_ERR_INDEX_CORRUPT,		ER_DEFAULT(ER_INDEX_CORRUPT));
431   SETMSG(HA_FTS_INVALID_DOCID,		"Invalid InnoDB FTS Doc ID");
432   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
433   SETMSG(HA_ERR_DISK_FULL,              ER_DEFAULT(ER_DISK_FULL));
434   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
435   SETMSG(HA_ERR_FK_DEPTH_EXCEEDED,      "Foreign key cascade delete/update exceeds");
436   SETMSG(HA_ERR_TABLESPACE_MISSING,     ER_DEFAULT(ER_TABLESPACE_MISSING));
437 
438   /* Register the error messages for use with my_error(). */
439   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
440 }
441 
442 
443 /**
444   Unregister handler error messages.
445 
446   @retval
447     0           OK
448   @retval
449     !=0         Error
450 */
451 static int ha_finish_errors(void)
452 {
453   /* Allocate a pointer array for the error message strings. */
454   my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
455   my_free(handler_errmsgs);
456   handler_errmsgs= 0;
457   return 0;
458 }
459 
460 static Atomic_counter<int32> need_full_discover_for_existence(0);
461 static Atomic_counter<int32> engines_with_discover_file_names(0);
462 static Atomic_counter<int32> engines_with_discover(0);
463 
464 static int full_discover_for_existence(handlerton *, const char *, const char *)
465 { return 0; }
466 
467 static int ext_based_existence(handlerton *, const char *, const char *)
468 { return 0; }
469 
470 static int hton_ext_based_table_discovery(handlerton *hton, LEX_CSTRING *db,
471                              MY_DIR *dir, handlerton::discovered_list *result)
472 {
473   /*
474     tablefile_extensions[0] is the metadata file, see
475     the comment above tablefile_extensions declaration
476   */
477   return extension_based_table_discovery(dir, hton->tablefile_extensions[0],
478                                          result);
479 }
480 
481 static void update_discovery_counters(handlerton *hton, int val)
482 {
483   if (hton->discover_table_existence == full_discover_for_existence)
484     need_full_discover_for_existence+= val;
485 
486   if (hton->discover_table_names && hton->tablefile_extensions[0])
487     engines_with_discover_file_names+= val;
488 
489   if (hton->discover_table)
490     engines_with_discover+= val;
491 }
492 
493 int ha_finalize_handlerton(st_plugin_int *plugin)
494 {
495   handlerton *hton= (handlerton *)plugin->data;
496   DBUG_ENTER("ha_finalize_handlerton");
497 
498   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
499   if (!hton)
500     goto end;
501 
502   switch (hton->state) {
503   case SHOW_OPTION_NO:
504   case SHOW_OPTION_DISABLED:
505     break;
506   case SHOW_OPTION_YES:
507     if (installed_htons[hton->db_type] == hton)
508       installed_htons[hton->db_type]= NULL;
509     break;
510   };
511 
512   if (hton->panic)
513     hton->panic(hton, HA_PANIC_CLOSE);
514 
515   if (plugin->plugin->deinit)
516   {
517     /*
518       Today we have no defined/special behavior for uninstalling
519       engine plugins.
520     */
521     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
522     if (plugin->plugin->deinit(NULL))
523     {
524       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
525                              plugin->name.str));
526     }
527   }
528 
529   free_sysvar_table_options(hton);
530   update_discovery_counters(hton, -1);
531 
532   /*
533     In case a plugin is uninstalled and re-installed later, it should
534     reuse an array slot. Otherwise the number of uninstall/install
535     cycles would be limited.
536   */
537   if (hton->slot != HA_SLOT_UNDEF)
538   {
539     /* Make sure we are not unpluging another plugin */
540     DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
541     DBUG_ASSERT(hton->slot < MAX_HA);
542     hton2plugin[hton->slot]= NULL;
543   }
544 
545   my_free(hton);
546 
547  end:
548   DBUG_RETURN(0);
549 }
550 
551 
552 int ha_initialize_handlerton(st_plugin_int *plugin)
553 {
554   handlerton *hton;
555   static const char *no_exts[]= { 0 };
556   DBUG_ENTER("ha_initialize_handlerton");
557   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
558 
559   hton= (handlerton *)my_malloc(sizeof(handlerton),
560                                 MYF(MY_WME | MY_ZEROFILL));
561   if (hton == NULL)
562   {
563     sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
564                     plugin->name.str);
565     goto err_no_hton_memory;
566   }
567 
568   hton->tablefile_extensions= no_exts;
569   hton->discover_table_names= hton_ext_based_table_discovery;
570 
571   hton->slot= HA_SLOT_UNDEF;
572   /* Historical Requirement */
573   plugin->data= hton; // shortcut for the future
574   if (plugin->plugin->init && plugin->plugin->init(hton))
575   {
576     sql_print_error("Plugin '%s' init function returned error.",
577 		    plugin->name.str);
578     goto err;
579   }
580 
581   // hton_ext_based_table_discovery() works only when discovery
582   // is supported and the engine if file-based.
583   if (hton->discover_table_names == hton_ext_based_table_discovery &&
584       (!hton->discover_table || !hton->tablefile_extensions[0]))
585     hton->discover_table_names= NULL;
586 
587   // default discover_table_existence implementation
588   if (!hton->discover_table_existence && hton->discover_table)
589   {
590     if (hton->tablefile_extensions[0])
591       hton->discover_table_existence= ext_based_existence;
592     else
593       hton->discover_table_existence= full_discover_for_existence;
594   }
595 
596   switch (hton->state) {
597   case SHOW_OPTION_NO:
598     break;
599   case SHOW_OPTION_YES:
600     {
601       uint tmp;
602       ulong fslot;
603 
604       DBUG_EXECUTE_IF("unstable_db_type", {
605                         static int i= (int) DB_TYPE_FIRST_DYNAMIC;
606                         hton->db_type= (enum legacy_db_type)++i;
607                       });
608 
609       /* now check the db_type for conflict */
610       if (hton->db_type <= DB_TYPE_UNKNOWN ||
611           hton->db_type >= DB_TYPE_DEFAULT ||
612           installed_htons[hton->db_type])
613       {
614         int idx= (int) DB_TYPE_FIRST_DYNAMIC;
615 
616         while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
617           idx++;
618 
619         if (idx == (int) DB_TYPE_DEFAULT)
620         {
621           sql_print_warning("Too many storage engines!");
622 	  goto err_deinit;
623         }
624         if (hton->db_type != DB_TYPE_UNKNOWN)
625           sql_print_warning("Storage engine '%s' has conflicting typecode. "
626                             "Assigning value %d.", plugin->plugin->name, idx);
627         hton->db_type= (enum legacy_db_type) idx;
628       }
629 
630       /*
631         In case a plugin is uninstalled and re-installed later, it should
632         reuse an array slot. Otherwise the number of uninstall/install
633         cycles would be limited. So look for a free slot.
634       */
635       DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
636       for (fslot= 0; fslot < total_ha; fslot++)
637       {
638         if (!hton2plugin[fslot])
639           break;
640       }
641       if (fslot < total_ha)
642         hton->slot= fslot;
643       else
644       {
645         if (total_ha >= MAX_HA)
646         {
647           sql_print_error("Too many plugins loaded. Limit is %lu. "
648                           "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
649           goto err_deinit;
650         }
651         hton->slot= total_ha++;
652       }
653       installed_htons[hton->db_type]= hton;
654       tmp= hton->savepoint_offset;
655       hton->savepoint_offset= savepoint_alloc_size;
656       savepoint_alloc_size+= tmp;
657       hton2plugin[hton->slot]=plugin;
658       if (hton->prepare)
659       {
660         total_ha_2pc++;
661         if (tc_log && tc_log != get_tc_log_implementation())
662         {
663           total_ha_2pc--;
664           hton->prepare= 0;
665           push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
666                               ER_UNKNOWN_ERROR,
667                               "Cannot enable tc-log at run-time. "
668                               "XA features of %s are disabled",
669                               plugin->name.str);
670         }
671       }
672       break;
673     }
674     /* fall through */
675   default:
676     hton->state= SHOW_OPTION_DISABLED;
677     break;
678   }
679 
680   /*
681     This is entirely for legacy. We will create a new "disk based" hton and a
682     "memory" hton which will be configurable longterm. We should be able to
683     remove partition.
684   */
685   switch (hton->db_type) {
686   case DB_TYPE_HEAP:
687     heap_hton= hton;
688     break;
689   case DB_TYPE_MYISAM:
690     myisam_hton= hton;
691     break;
692   case DB_TYPE_PARTITION_DB:
693     partition_hton= hton;
694     break;
695   case DB_TYPE_SEQUENCE:
696     sql_sequence_hton= hton;
697     break;
698   default:
699     break;
700   };
701 
702   resolve_sysvar_table_options(hton);
703   update_discovery_counters(hton, 1);
704 
705   DBUG_RETURN(0);
706 
707 err_deinit:
708   /*
709     Let plugin do its inner deinitialization as plugin->init()
710     was successfully called before.
711   */
712   if (plugin->plugin->deinit)
713     (void) plugin->plugin->deinit(NULL);
714 
715 err:
716 #ifdef DBUG_ASSERT_EXISTS
717   if (hton->prepare && hton->state == SHOW_OPTION_YES)
718     failed_ha_2pc++;
719 #endif
720   my_free(hton);
721 err_no_hton_memory:
722   plugin->data= NULL;
723   DBUG_RETURN(1);
724 }
725 
726 int ha_init()
727 {
728   int error= 0;
729   DBUG_ENTER("ha_init");
730 
731   DBUG_ASSERT(total_ha < MAX_HA);
732   /*
733     Check if there is a transaction-capable storage engine besides the
734     binary log (which is considered a transaction-capable storage engine in
735     counting total_ha)
736   */
737   opt_using_transactions= total_ha > (ulong) opt_bin_log;
738   savepoint_alloc_size+= sizeof(SAVEPOINT);
739   DBUG_RETURN(error);
740 }
741 
742 int ha_end()
743 {
744   int error= 0;
745   DBUG_ENTER("ha_end");
746 
747   /*
748     This should be eventually based on the graceful shutdown flag.
749     So if flag is equal to HA_PANIC_CLOSE, the deallocate
750     the errors.
751   */
752   if (unlikely(ha_finish_errors()))
753     error= 1;
754 
755   DBUG_RETURN(error);
756 }
757 
758 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
759                                  void *path)
760 {
761   handlerton *hton= plugin_hton(plugin);
762   if (hton->state == SHOW_OPTION_YES && hton->drop_database)
763     hton->drop_database(hton, (char *)path);
764   return FALSE;
765 }
766 
767 
768 void ha_drop_database(char* path)
769 {
770   plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
771 }
772 
773 
774 static my_bool checkpoint_state_handlerton(THD *unused1, plugin_ref plugin,
775                                            void *disable)
776 {
777   handlerton *hton= plugin_hton(plugin);
778   if (hton->state == SHOW_OPTION_YES && hton->checkpoint_state)
779     hton->checkpoint_state(hton, (int) *(bool*) disable);
780   return FALSE;
781 }
782 
783 
784 void ha_checkpoint_state(bool disable)
785 {
786   plugin_foreach(NULL, checkpoint_state_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &disable);
787 }
788 
789 
790 struct st_commit_checkpoint_request {
791   void *cookie;
792   void (*pre_hook)(void *);
793 };
794 
795 static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
796                                            void *data)
797 {
798   st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
799   handlerton *hton= plugin_hton(plugin);
800   if (hton->state == SHOW_OPTION_YES && hton->commit_checkpoint_request)
801   {
802     void *cookie= st->cookie;
803     if (st->pre_hook)
804       (*st->pre_hook)(cookie);
805     (*hton->commit_checkpoint_request)(hton, cookie);
806   }
807   return FALSE;
808 }
809 
810 
811 /*
812   Invoke commit_checkpoint_request() in all storage engines that implement it.
813 
814   If pre_hook is non-NULL, the hook will be called prior to each invocation.
815 */
816 void
817 ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
818 {
819   st_commit_checkpoint_request st;
820   st.cookie= cookie;
821   st.pre_hook= pre_hook;
822   plugin_foreach(NULL, commit_checkpoint_request_handlerton,
823                  MYSQL_STORAGE_ENGINE_PLUGIN, &st);
824 }
825 
826 
827 
828 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
829                                    void *unused)
830 {
831   handlerton *hton= plugin_hton(plugin);
832   /*
833     there's no need to rollback here as all transactions must
834     be rolled back already
835   */
836   if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
837   {
838     if (hton->close_connection)
839       hton->close_connection(hton, thd);
840     /* make sure ha_data is reset and ha_data_lock is released */
841     thd_set_ha_data(thd, hton, NULL);
842   }
843   return FALSE;
844 }
845 
846 /**
847   @note
848     don't bother to rollback here, it's done already
849 */
850 void ha_close_connection(THD* thd)
851 {
852   plugin_foreach_with_mask(thd, closecon_handlerton,
853                            MYSQL_STORAGE_ENGINE_PLUGIN,
854                            PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
855 }
856 
857 static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
858                                void *level)
859 {
860   handlerton *hton= plugin_hton(plugin);
861 
862   mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
863   if (hton->state == SHOW_OPTION_YES && hton->kill_query &&
864       thd_get_ha_data(thd, hton))
865     hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
866   return FALSE;
867 }
868 
869 void ha_kill_query(THD* thd, enum thd_kill_levels level)
870 {
871   DBUG_ENTER("ha_kill_query");
872   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &level);
873   DBUG_VOID_RETURN;
874 }
875 
876 
877 /*****************************************************************************
878   Backup functions
879 ******************************************************************************/
880 
881 static my_bool plugin_prepare_for_backup(THD *unused1, plugin_ref plugin,
882                                          void *not_used)
883 {
884   handlerton *hton= plugin_hton(plugin);
885   if (hton->state == SHOW_OPTION_YES && hton->prepare_for_backup)
886     hton->prepare_for_backup();
887   return FALSE;
888 }
889 
890 void ha_prepare_for_backup()
891 {
892   plugin_foreach_with_mask(0, plugin_prepare_for_backup,
893                            MYSQL_STORAGE_ENGINE_PLUGIN,
894                            PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
895 }
896 
897 static my_bool plugin_end_backup(THD *unused1, plugin_ref plugin,
898                                  void *not_used)
899 {
900   handlerton *hton= plugin_hton(plugin);
901   if (hton->state == SHOW_OPTION_YES && hton->end_backup)
902     hton->end_backup();
903   return FALSE;
904 }
905 
906 void ha_end_backup()
907 {
908   plugin_foreach_with_mask(0, plugin_end_backup,
909                            MYSQL_STORAGE_ENGINE_PLUGIN,
910                            PLUGIN_IS_DELETED|PLUGIN_IS_READY, 0);
911 }
912 
913 
914 /* ========================================================================
915  ======================= TRANSACTIONS ===================================*/
916 
917 /**
918   Transaction handling in the server
919   ==================================
920 
921   In each client connection, MySQL maintains two transactional
922   states:
923   - a statement transaction,
924   - a standard, also called normal transaction.
925 
926   Historical note
927   ---------------
928   "Statement transaction" is a non-standard term that comes
929   from the times when MySQL supported BerkeleyDB storage engine.
930 
931   First of all, it should be said that in BerkeleyDB auto-commit
932   mode auto-commits operations that are atomic to the storage
933   engine itself, such as a write of a record, and are too
934   high-granular to be atomic from the application perspective
935   (MySQL). One SQL statement could involve many BerkeleyDB
936   auto-committed operations and thus BerkeleyDB auto-commit was of
937   little use to MySQL.
938 
939   Secondly, instead of SQL standard savepoints, BerkeleyDB
940   provided the concept of "nested transactions". In a nutshell,
941   transactions could be arbitrarily nested, but when the parent
942   transaction was committed or aborted, all its child (nested)
943   transactions were handled committed or aborted as well.
944   Commit of a nested transaction, in turn, made its changes
945   visible, but not durable: it destroyed the nested transaction,
946   all its changes would become available to the parent and
947   currently active nested transactions of this parent.
948 
949   So the mechanism of nested transactions was employed to
950   provide "all or nothing" guarantee of SQL statements
951   required by the standard.
952   A nested transaction would be created at start of each SQL
953   statement, and destroyed (committed or aborted) at statement
954   end. Such nested transaction was internally referred to as
955   a "statement transaction" and gave birth to the term.
956 
957   (Historical note ends)
958 
959   Since then a statement transaction is started for each statement
960   that accesses transactional tables or uses the binary log.  If
961   the statement succeeds, the statement transaction is committed.
962   If the statement fails, the transaction is rolled back. Commits
963   of statement transactions are not durable -- each such
964   transaction is nested in the normal transaction, and if the
965   normal transaction is rolled back, the effects of all enclosed
966   statement transactions are undone as well.  Technically,
967   a statement transaction can be viewed as a savepoint which is
968   maintained automatically in order to make effects of one
969   statement atomic.
970 
971   The normal transaction is started by the user and is ended
972   usually upon a user request as well. The normal transaction
973   encloses transactions of all statements issued between
974   its beginning and its end.
975   In autocommit mode, the normal transaction is equivalent
976   to the statement transaction.
977 
978   Since MySQL supports PSEA (pluggable storage engine
979   architecture), more than one transactional engine can be
980   active at a time. Hence transactions, from the server
981   point of view, are always distributed. In particular,
982   transactional state is maintained independently for each
983   engine. In order to commit a transaction the two phase
984   commit protocol is employed.
985 
986   Not all statements are executed in context of a transaction.
987   Administrative and status information statements do not modify
988   engine data, and thus do not start a statement transaction and
989   also have no effect on the normal transaction. Examples of such
990   statements are SHOW STATUS and RESET SLAVE.
991 
992   Similarly DDL statements are not transactional,
993   and therefore a transaction is [almost] never started for a DDL
994   statement. The difference between a DDL statement and a purely
995   administrative statement though is that a DDL statement always
996   commits the current transaction before proceeding, if there is
997   any.
998 
999   At last, SQL statements that work with non-transactional
1000   engines also have no effect on the transaction state of the
1001   connection. Even though they are written to the binary log,
1002   and the binary log is, overall, transactional, the writes
1003   are done in "write-through" mode, directly to the binlog
1004   file, followed with a OS cache sync, in other words,
1005   bypassing the binlog undo log (translog).
1006   They do not commit the current normal transaction.
1007   A failure of a statement that uses non-transactional tables
1008   would cause a rollback of the statement transaction, but
1009   in case there no non-transactional tables are used,
1010   no statement transaction is started.
1011 
1012   Data layout
1013   -----------
1014 
1015   The server stores its transaction-related data in
1016   thd->transaction. This structure has two members of type
1017   THD_TRANS. These members correspond to the statement and
1018   normal transactions respectively:
1019 
1020   - thd->transaction.stmt contains a list of engines
1021   that are participating in the given statement
1022   - thd->transaction.all contains a list of engines that
1023   have participated in any of the statement transactions started
1024   within the context of the normal transaction.
1025   Each element of the list contains a pointer to the storage
1026   engine, engine-specific transactional data, and engine-specific
1027   transaction flags.
1028 
1029   In autocommit mode thd->transaction.all is empty.
1030   Instead, data of thd->transaction.stmt is
1031   used to commit/rollback the normal transaction.
1032 
1033   The list of registered engines has a few important properties:
1034   - no engine is registered in the list twice
1035   - engines are present in the list a reverse temporal order --
1036   new participants are always added to the beginning of the list.
1037 
1038   Transaction life cycle
1039   ----------------------
1040 
1041   When a new connection is established, thd->transaction
1042   members are initialized to an empty state.
1043   If a statement uses any tables, all affected engines
1044   are registered in the statement engine list. In
1045   non-autocommit mode, the same engines are registered in
1046   the normal transaction list.
1047   At the end of the statement, the server issues a commit
1048   or a roll back for all engines in the statement list.
1049   At this point transaction flags of an engine, if any, are
1050   propagated from the statement list to the list of the normal
1051   transaction.
1052   When commit/rollback is finished, the statement list is
1053   cleared. It will be filled in again by the next statement,
1054   and emptied again at the next statement's end.
1055 
1056   The normal transaction is committed in a similar way
1057   (by going over all engines in thd->transaction.all list)
1058   but at different times:
1059   - upon COMMIT SQL statement is issued by the user
1060   - implicitly, by the server, at the beginning of a DDL statement
1061   or SET AUTOCOMMIT={0|1} statement.
1062 
1063   The normal transaction can be rolled back as well:
1064   - if the user has requested so, by issuing ROLLBACK SQL
1065   statement
1066   - if one of the storage engines requested a rollback
1067   by setting thd->transaction_rollback_request. This may
1068   happen in case, e.g., when the transaction in the engine was
1069   chosen a victim of the internal deadlock resolution algorithm
1070   and rolled back internally. When such a situation happens, there
1071   is little the server can do and the only option is to rollback
1072   transactions in all other participating engines.  In this case
1073   the rollback is accompanied by an error sent to the user.
1074 
1075   As follows from the use cases above, the normal transaction
1076   is never committed when there is an outstanding statement
1077   transaction. In most cases there is no conflict, since
1078   commits of the normal transaction are issued by a stand-alone
1079   administrative or DDL statement, thus no outstanding statement
1080   transaction of the previous statement exists. Besides,
1081   all statements that manipulate with the normal transaction
1082   are prohibited in stored functions and triggers, therefore
1083   no conflicting situation can occur in a sub-statement either.
1084   The remaining rare cases when the server explicitly has
1085   to commit the statement transaction prior to committing the normal
1086   one cover error-handling scenarios (see for example
1087   SQLCOM_LOCK_TABLES).
1088 
1089   When committing a statement or a normal transaction, the server
1090   either uses the two-phase commit protocol, or issues a commit
1091   in each engine independently. The two-phase commit protocol
1092   is used only if:
1093   - all participating engines support two-phase commit (provide
1094     handlerton::prepare PSEA API call) and
1095   - transactions in at least two engines modify data (i.e. are
1096   not read-only).
1097 
1098   Note that the two phase commit is used for
1099   statement transactions, even though they are not durable anyway.
1100   This is done to ensure logical consistency of data in a multiple-
1101   engine transaction.
1102   For example, imagine that some day MySQL supports unique
1103   constraint checks deferred till the end of statement. In such
1104   case a commit in one of the engines may yield ER_DUP_KEY,
1105   and MySQL should be able to gracefully abort statement
1106   transactions of other participants.
1107 
1108   After the normal transaction has been committed,
1109   thd->transaction.all list is cleared.
1110 
1111   When a connection is closed, the current normal transaction, if
1112   any, is rolled back.
1113 
1114   Roles and responsibilities
1115   --------------------------
1116 
1117   The server has no way to know that an engine participates in
1118   the statement and a transaction has been started
1119   in it unless the engine says so. Thus, in order to be
1120   a part of a transaction, the engine must "register" itself.
1121   This is done by invoking trans_register_ha() server call.
1122   Normally the engine registers itself whenever handler::external_lock()
1123   is called. trans_register_ha() can be invoked many times: if
1124   an engine is already registered, the call does nothing.
1125   In case autocommit is not set, the engine must register itself
1126   twice -- both in the statement list and in the normal transaction
1127   list.
1128   In which list to register is a parameter of trans_register_ha().
1129 
1130   Note, that although the registration interface in itself is
1131   fairly clear, the current usage practice often leads to undesired
1132   effects. E.g. since a call to trans_register_ha() in most engines
1133   is embedded into implementation of handler::external_lock(), some
1134   DDL statements start a transaction (at least from the server
1135   point of view) even though they are not expected to. E.g.
1136   CREATE TABLE does not start a transaction, since
1137   handler::external_lock() is never called during CREATE TABLE. But
1138   CREATE TABLE ... SELECT does, since handler::external_lock() is
1139   called for the table that is being selected from. This has no
1140   practical effects currently, but must be kept in mind
1141   nevertheless.
1142 
1143   Once an engine is registered, the server will do the rest
1144   of the work.
1145 
1146   During statement execution, whenever any of data-modifying
1147   PSEA API methods is used, e.g. handler::write_row() or
1148   handler::update_row(), the read-write flag is raised in the
1149   statement transaction for the involved engine.
1150   Currently All PSEA calls are "traced", and the data can not be
1151   changed in a way other than issuing a PSEA call. Important:
1152   unless this invariant is preserved the server will not know that
1153   a transaction in a given engine is read-write and will not
1154   involve the two-phase commit protocol!
1155 
1156   At the end of a statement, server call trans_commit_stmt is
1157   invoked. This call in turn invokes handlerton::prepare()
1158   for every involved engine. Prepare is followed by a call
1159   to handlerton::commit_one_phase() If a one-phase commit
1160   will suffice, handlerton::prepare() is not invoked and
1161   the server only calls handlerton::commit_one_phase().
1162   At statement commit, the statement-related read-write
1163   engine flag is propagated to the corresponding flag in the
1164   normal transaction.  When the commit is complete, the list
1165   of registered engines is cleared.
1166 
1167   Rollback is handled in a similar fashion.
1168 
1169   Additional notes on DDL and the normal transaction.
1170   ---------------------------------------------------
1171 
1172   DDLs and operations with non-transactional engines
1173   do not "register" in thd->transaction lists, and thus do not
1174   modify the transaction state. Besides, each DDL in
1175   MySQL is prefixed with an implicit normal transaction commit
1176   (a call to trans_commit_implicit()), and thus leaves nothing
1177   to modify.
1178   However, as it has been pointed out with CREATE TABLE .. SELECT,
1179   some DDL statements can start a *new* transaction.
1180 
1181   Behaviour of the server in this case is currently badly
1182   defined.
1183   DDL statements use a form of "semantic" logging
1184   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1185   the newly created table is deleted.
1186   In addition, some DDL statements issue interim transaction
1187   commits: e.g. ALTER TABLE issues a commit after data is copied
1188   from the original table to the internal temporary table. Other
1189   statements, e.g. CREATE TABLE ... SELECT do not always commit
1190   after itself.
1191   And finally there is a group of DDL statements such as
1192   RENAME/DROP TABLE that doesn't start a new transaction
1193   and doesn't commit.
1194 
1195   This diversity makes it hard to say what will happen if
1196   by chance a stored function is invoked during a DDL --
1197   whether any modifications it makes will be committed or not
1198   is not clear. Fortunately, SQL grammar of few DDLs allows
1199   invocation of a stored function.
1200 
1201   A consistent behaviour is perhaps to always commit the normal
1202   transaction after all DDLs, just like the statement transaction
1203   is always committed at the end of all statements.
1204 */
1205 
1206 /**
1207   Register a storage engine for a transaction.
1208 
1209   Every storage engine MUST call this function when it starts
1210   a transaction or a statement (that is it must be called both for the
1211   "beginning of transaction" and "beginning of statement").
1212   Only storage engines registered for the transaction/statement
1213   will know when to commit/rollback it.
1214 
1215   @note
1216     trans_register_ha is idempotent - storage engine may register many
1217     times per transaction.
1218 
1219 */
1220 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
1221 {
1222   THD_TRANS *trans;
1223   Ha_trx_info *ha_info;
1224   DBUG_ENTER("trans_register_ha");
1225   DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1226 
1227   if (all)
1228   {
1229     trans= &thd->transaction.all;
1230     thd->server_status|= SERVER_STATUS_IN_TRANS;
1231     if (thd->tx_read_only)
1232       thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1233     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1234   }
1235   else
1236     trans= &thd->transaction.stmt;
1237 
1238   ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1239 
1240   if (ha_info->is_started())
1241     DBUG_VOID_RETURN; /* already registered, return */
1242 
1243   ha_info->register_ha(trans, ht_arg);
1244 
1245   trans->no_2pc|=(ht_arg->prepare==0);
1246 
1247   /* Set implicit xid even if there's explicit XA, it will be ignored anyway. */
1248   if (thd->transaction.implicit_xid.is_null())
1249     thd->transaction.implicit_xid.set(thd->query_id);
1250 
1251   DBUG_VOID_RETURN;
1252 }
1253 
1254 
1255 static int prepare_or_error(handlerton *ht, THD *thd, bool all)
1256 {
1257 #ifdef WITH_WSREP
1258   const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1259   if (run_wsrep_hooks && ht->flags & HTON_WSREP_REPLICATION &&
1260       wsrep_before_prepare(thd, all))
1261   {
1262     return(1);
1263   }
1264 #endif /* WITH_WSREP */
1265 
1266   int err= ht->prepare(ht, thd, all);
1267   status_var_increment(thd->status_var.ha_prepare_count);
1268   if (err)
1269   {
1270       my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1271   }
1272 #ifdef WITH_WSREP
1273   if (run_wsrep_hooks && !err && ht->flags & HTON_WSREP_REPLICATION &&
1274       wsrep_after_prepare(thd, all))
1275   {
1276     err= 1;
1277   }
1278 #endif /* WITH_WSREP */
1279 
1280   return err;
1281 }
1282 
1283 
1284 /**
1285   @retval
1286     0   ok
1287   @retval
1288     1   error, transaction was rolled back
1289 */
1290 int ha_prepare(THD *thd)
1291 {
1292   int error=0, all=1;
1293   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1294   Ha_trx_info *ha_info= trans->ha_list;
1295   DBUG_ENTER("ha_prepare");
1296 
1297   if (ha_info)
1298   {
1299     for (; ha_info; ha_info= ha_info->next())
1300     {
1301       handlerton *ht= ha_info->ht();
1302       if (ht->prepare)
1303       {
1304         if (unlikely(prepare_or_error(ht, thd, all)))
1305         {
1306           ha_rollback_trans(thd, all);
1307           error=1;
1308           break;
1309         }
1310       }
1311       else
1312       {
1313         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1314                             ER_GET_ERRNO, ER_THD(thd, ER_GET_ERRNO),
1315                             HA_ERR_WRONG_COMMAND,
1316                             ha_resolve_storage_engine_name(ht));
1317 
1318       }
1319     }
1320   }
1321 
1322   DBUG_RETURN(error);
1323 }
1324 
1325 /**
1326   Check if we can skip the two-phase commit.
1327 
1328   A helper function to evaluate if two-phase commit is mandatory.
1329   As a side effect, propagates the read-only/read-write flags
1330   of the statement transaction to its enclosing normal transaction.
1331 
1332   If we have at least two engines with read-write changes we must
1333   run a two-phase commit. Otherwise we can run several independent
1334   commits as the only transactional engine has read-write changes
1335   and others are read-only.
1336 
1337   @retval   0   All engines are read-only.
1338   @retval   1   We have the only engine with read-write changes.
1339   @retval   >1  More than one engine have read-write changes.
1340                 Note: return value might NOT be the exact number of
1341                 engines with read-write changes.
1342 */
1343 
1344 static
1345 uint
1346 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1347                                     bool all)
1348 {
1349   /* The number of storage engines that have actual changes. */
1350   unsigned rw_ha_count= 0;
1351   Ha_trx_info *ha_info;
1352 
1353   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1354   {
1355     if (ha_info->is_trx_read_write())
1356       ++rw_ha_count;
1357 
1358     if (! all)
1359     {
1360       Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1361       DBUG_ASSERT(ha_info != ha_info_all);
1362       /*
1363         Merge read-only/read-write information about statement
1364         transaction to its enclosing normal transaction. Do this
1365         only if in a real transaction -- that is, if we know
1366         that ha_info_all is registered in thd->transaction.all.
1367         Since otherwise we only clutter the normal transaction flags.
1368       */
1369       if (ha_info_all->is_started()) /* FALSE if autocommit. */
1370         ha_info_all->coalesce_trx_with(ha_info);
1371     }
1372     else if (rw_ha_count > 1)
1373     {
1374       /*
1375         It is a normal transaction, so we don't need to merge read/write
1376         information up, and the need for two-phase commit has been
1377         already established. Break the loop prematurely.
1378       */
1379       break;
1380     }
1381   }
1382   return rw_ha_count;
1383 }
1384 
1385 
1386 /**
1387   @retval
1388     0   ok
1389   @retval
1390     1   transaction was rolled back
1391   @retval
1392     2   error during commit, data may be inconsistent
1393 
1394   @todo
1395     Since we don't support nested statement transactions in 5.0,
1396     we can't commit or rollback stmt transactions while we are inside
1397     stored functions or triggers. So we simply do nothing now.
1398     TODO: This should be fixed in later ( >= 5.1) releases.
1399 */
1400 int ha_commit_trans(THD *thd, bool all)
1401 {
1402   int error= 0, cookie;
1403   /*
1404     'all' means that this is either an explicit commit issued by
1405     user, or an implicit commit issued by a DDL.
1406   */
1407   THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1408   /*
1409     "real" is a nick name for a transaction for which a commit will
1410     make persistent changes. E.g. a 'stmt' transaction inside an 'all'
1411     transaction is not 'real': even though it's possible to commit it,
1412     the changes are not durable as they might be rolled back if the
1413     enclosing 'all' transaction is rolled back.
1414   */
1415   bool is_real_trans= ((all || thd->transaction.all.ha_list == 0) &&
1416                        !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1417   Ha_trx_info *ha_info= trans->ha_list;
1418   bool need_prepare_ordered, need_commit_ordered;
1419   my_xid xid;
1420 #ifdef WITH_WSREP
1421   const bool run_wsrep_hooks= wsrep_run_commit_hook(thd, all);
1422 #endif /* WITH_WSREP */
1423   DBUG_ENTER("ha_commit_trans");
1424   DBUG_PRINT("info",("thd: %p  option_bits: %lu  all: %d",
1425                      thd, (ulong) thd->variables.option_bits, all));
1426 
1427   /* Just a random warning to test warnings pushed during autocommit. */
1428   DBUG_EXECUTE_IF("warn_during_ha_commit_trans",
1429     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1430                  ER_WARNING_NOT_COMPLETE_ROLLBACK,
1431                  ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK)););
1432 
1433   DBUG_PRINT("info",
1434              ("all: %d  thd->in_sub_stmt: %d  ha_info: %p  is_real_trans: %d",
1435               all, thd->in_sub_stmt, ha_info, is_real_trans));
1436   /*
1437     We must not commit the normal transaction if a statement
1438     transaction is pending. Otherwise statement transaction
1439     flags will not get propagated to its normal transaction's
1440     counterpart.
1441   */
1442   DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1443               trans == &thd->transaction.stmt);
1444 
1445   if (thd->in_sub_stmt)
1446   {
1447     DBUG_ASSERT(0);
1448     /*
1449       Since we don't support nested statement transactions in 5.0,
1450       we can't commit or rollback stmt transactions while we are inside
1451       stored functions or triggers. So we simply do nothing now.
1452       TODO: This should be fixed in later ( >= 5.1) releases.
1453     */
1454     if (!all)
1455       DBUG_RETURN(0);
1456     /*
1457       We assume that all statements which commit or rollback main transaction
1458       are prohibited inside of stored functions or triggers. So they should
1459       bail out with error even before ha_commit_trans() call. To be 100% safe
1460       let us throw error in non-debug builds.
1461     */
1462     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1463     DBUG_RETURN(2);
1464   }
1465 
1466   if (!ha_info)
1467   {
1468     /*
1469       Free resources and perform other cleanup even for 'empty' transactions.
1470     */
1471     if (is_real_trans)
1472       thd->transaction.cleanup();
1473 #ifdef WITH_WSREP
1474     if (wsrep_is_active(thd) && is_real_trans && !error)
1475     {
1476       wsrep_commit_empty(thd, all);
1477     }
1478 #endif /* WITH_WSREP */
1479 
1480     ha_maria_implicit_commit(thd, TRUE);
1481     DBUG_RETURN(error);
1482   }
1483 
1484   DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1485 
1486   /* Close all cursors that can not survive COMMIT */
1487   if (is_real_trans)                          /* not a statement commit */
1488     thd->stmt_map.close_transient_cursors();
1489 
1490   uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1491   /* rw_trans is TRUE when we in a transaction changing data */
1492   bool rw_trans= is_real_trans &&
1493                  (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
1494   MDL_request mdl_backup;
1495   DBUG_PRINT("info", ("is_real_trans: %d  rw_trans:  %d  rw_ha_count: %d",
1496                       is_real_trans, rw_trans, rw_ha_count));
1497 
1498   if (rw_trans)
1499   {
1500     /*
1501       Acquire a metadata lock which will ensure that COMMIT is blocked
1502       by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1503       COMMIT in progress blocks FTWRL).
1504 
1505       We allow the owner of FTWRL to COMMIT; we assume that it knows
1506       what it does.
1507     */
1508     mdl_backup.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
1509 
1510     if (!WSREP(thd))
1511     {
1512       if (thd->mdl_context.acquire_lock(&mdl_backup,
1513                                         thd->variables.lock_wait_timeout))
1514       {
1515         ha_rollback_trans(thd, all);
1516         DBUG_RETURN(1);
1517       }
1518       thd->backup_commit_lock= &mdl_backup;
1519     }
1520     DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1521 
1522     /* Use shortcut as we already have the MDL_BACKUP_COMMIT lock */
1523     ha_maria::implicit_commit(thd, TRUE);
1524   }
1525   else
1526     ha_maria_implicit_commit(thd, TRUE);
1527 
1528   if (rw_trans &&
1529       opt_readonly &&
1530       !(thd->security_ctx->master_access & SUPER_ACL) &&
1531       !thd->slave_thread)
1532   {
1533     my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1534     goto err;
1535   }
1536 
1537 #if 1 // FIXME: This should be done in ha_prepare().
1538   if (rw_trans || (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
1539                    thd->lex->alter_info.flags & ALTER_ADD_SYSTEM_VERSIONING &&
1540                    is_real_trans))
1541   {
1542     ulonglong trx_start_id= 0, trx_end_id= 0;
1543     for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
1544     {
1545       if (ha_info->ht()->prepare_commit_versioned)
1546       {
1547         trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id);
1548         if (trx_end_id)
1549           break; // FIXME: use a common ID for cross-engine transactions
1550       }
1551     }
1552 
1553     if (trx_end_id)
1554     {
1555       if (!TR_table::use_transaction_registry)
1556       {
1557         my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
1558         goto err;
1559       }
1560       DBUG_ASSERT(trx_start_id);
1561 #ifdef WITH_WSREP
1562       bool saved_wsrep_on= thd->variables.wsrep_on;
1563       thd->variables.wsrep_on= false;
1564 #endif
1565       TR_table trt(thd, true);
1566       if (trt.update(trx_start_id, trx_end_id))
1567       {
1568 #ifdef WITH_WSREP
1569         thd->variables.wsrep_on= saved_wsrep_on;
1570 #endif
1571         (void) trans_rollback_stmt(thd);
1572         goto err;
1573       }
1574       // Here, the call will not commit inside InnoDB. It is only working
1575       // around closing thd->transaction.stmt open by TR_table::open().
1576       if (all)
1577         commit_one_phase_2(thd, false, &thd->transaction.stmt, false);
1578 #ifdef WITH_WSREP
1579       thd->variables.wsrep_on= saved_wsrep_on;
1580 #endif
1581     }
1582   }
1583 #endif
1584 
1585   if (trans->no_2pc || (rw_ha_count <= 1))
1586   {
1587 #ifdef WITH_WSREP
1588     /*
1589       This commit will not go through log_and_order() where wsrep commit
1590       ordering is normally done. Commit ordering must be done here.
1591     */
1592     if (run_wsrep_hooks)
1593       error= wsrep_before_commit(thd, all);
1594     if (error)
1595     {
1596       ha_rollback_trans(thd, FALSE);
1597       goto wsrep_err;
1598     }
1599 #endif /* WITH_WSREP */
1600     error= ha_commit_one_phase(thd, all);
1601 #ifdef WITH_WSREP
1602     if (run_wsrep_hooks)
1603       error= error || wsrep_after_commit(thd, all);
1604 #endif /* WITH_WSREP */
1605     goto done;
1606   }
1607 
1608   need_prepare_ordered= FALSE;
1609   need_commit_ordered= FALSE;
1610   DBUG_ASSERT(thd->transaction.implicit_xid.get_my_xid() ==
1611               thd->transaction.implicit_xid.quick_get_my_xid());
1612   xid= thd->transaction.xid_state.is_explicit_XA() ? 0 :
1613        thd->transaction.implicit_xid.quick_get_my_xid();
1614 
1615   for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
1616   {
1617     handlerton *ht= hi->ht();
1618     /*
1619       Do not call two-phase commit if this particular
1620       transaction is read-only. This allows for simpler
1621       implementation in engines that are always read-only.
1622     */
1623     if (! hi->is_trx_read_write())
1624       continue;
1625     /*
1626       Sic: we know that prepare() is not NULL since otherwise
1627       trans->no_2pc would have been set.
1628     */
1629     if (unlikely(prepare_or_error(ht, thd, all)))
1630       goto err;
1631 
1632     need_prepare_ordered|= (ht->prepare_ordered != NULL);
1633     need_commit_ordered|= (ht->commit_ordered != NULL);
1634   }
1635   DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
1636   DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
1637 
1638 #ifdef WITH_WSREP
1639   if (run_wsrep_hooks && !error)
1640   {
1641     wsrep::seqno const s= wsrep_xid_seqno(thd->wsrep_xid);
1642     if (!s.is_undefined())
1643     {
1644       // xid was rewritten by wsrep
1645       xid= s.get();
1646     }
1647   }
1648 #endif /* WITH_WSREP */
1649 
1650   if (!is_real_trans)
1651   {
1652     error= commit_one_phase_2(thd, all, trans, is_real_trans);
1653     goto done;
1654   }
1655 #ifdef WITH_WSREP
1656   if (run_wsrep_hooks && (error = wsrep_before_commit(thd, all)))
1657     goto wsrep_err;
1658 #endif /* WITH_WSREP */
1659   DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
1660   cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
1661                                 need_commit_ordered);
1662   if (!cookie)
1663   {
1664     WSREP_DEBUG("log_and_order has failed %llu %d", thd->thread_id, cookie);
1665     goto err;
1666   }
1667   DEBUG_SYNC(thd, "ha_commit_trans_after_log_and_order");
1668   DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
1669 
1670   error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
1671 #ifdef WITH_WSREP
1672   if (run_wsrep_hooks && (error || (error = wsrep_after_commit(thd, all))))
1673   {
1674     mysql_mutex_lock(&thd->LOCK_thd_data);
1675     if (wsrep_must_abort(thd))
1676     {
1677       mysql_mutex_unlock(&thd->LOCK_thd_data);
1678       (void)tc_log->unlog(cookie, xid);
1679       goto wsrep_err;
1680     }
1681     mysql_mutex_unlock(&thd->LOCK_thd_data);
1682   }
1683 #endif /* WITH_WSREP */
1684   DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
1685   if (tc_log->unlog(cookie, xid))
1686   {
1687     error= 2;                                /* Error during commit */
1688     goto end;
1689   }
1690 
1691 done:
1692   DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1693 
1694   mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
1695   mysql_mutex_assert_not_owner(mysql_bin_log.get_log_lock());
1696   mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
1697   mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
1698 #ifdef HAVE_REPLICATION
1699   repl_semisync_master.wait_after_commit(thd, all);
1700   DEBUG_SYNC(thd, "after_group_after_commit");
1701 #endif
1702   goto end;
1703 
1704   /* Come here if error and we need to rollback. */
1705 #ifdef WITH_WSREP
1706 wsrep_err:
1707   mysql_mutex_lock(&thd->LOCK_thd_data);
1708   if (run_wsrep_hooks && wsrep_must_abort(thd))
1709   {
1710     WSREP_DEBUG("BF abort has happened after prepare & certify");
1711     mysql_mutex_unlock(&thd->LOCK_thd_data);
1712     ha_rollback_trans(thd, TRUE);
1713   }
1714   else
1715     mysql_mutex_unlock(&thd->LOCK_thd_data);
1716 
1717 #endif /* WITH_WSREP */
1718 err:
1719   error= 1;                                  /* Transaction was rolled back */
1720   /*
1721     In parallel replication, rollback is delayed, as there is extra replication
1722     book-keeping to be done before rolling back and allowing a conflicting
1723     transaction to continue (MDEV-7458).
1724   */
1725   if (!(thd->rgi_slave && thd->rgi_slave->is_parallel_exec))
1726     ha_rollback_trans(thd, all);
1727   else
1728   {
1729     WSREP_DEBUG("rollback skipped %p %d",thd->rgi_slave,
1730                 thd->rgi_slave->is_parallel_exec);
1731   }
1732 end:
1733   if (mdl_backup.ticket)
1734   {
1735     /*
1736       We do not always immediately release transactional locks
1737       after ha_commit_trans() (see uses of ha_enable_transaction()),
1738       thus we release the commit blocker lock as soon as it's
1739       not needed.
1740     */
1741     thd->mdl_context.release_lock(mdl_backup.ticket);
1742   }
1743   thd->backup_commit_lock= 0;
1744 #ifdef WITH_WSREP
1745   if (wsrep_is_active(thd) && is_real_trans && !error &&
1746       (rw_ha_count == 0 || all) &&
1747       wsrep_not_committed(thd))
1748   {
1749     wsrep_commit_empty(thd, all);
1750   }
1751 #endif /* WITH_WSREP */
1752 
1753   DBUG_RETURN(error);
1754 }
1755 
1756 /**
1757   @note
1758   This function does not care about global read lock or backup locks,
1759   the caller should.
1760 
1761   @param[in]  all  Is set in case of explicit commit
1762                    (COMMIT statement), or implicit commit
1763                    issued by DDL. Is not set when called
1764                    at the end of statement, even if
1765                    autocommit=1.
1766 */
1767 
1768 int ha_commit_one_phase(THD *thd, bool all)
1769 {
1770   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1771   /*
1772     "real" is a nick name for a transaction for which a commit will
1773     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1774     transaction is not 'real': even though it's possible to commit it,
1775     the changes are not durable as they might be rolled back if the
1776     enclosing 'all' transaction is rolled back.
1777     We establish the value of 'is_real_trans' by checking
1778     if it's an explicit COMMIT/BEGIN statement, or implicit
1779     commit issued by DDL (all == TRUE), or if we're running
1780     in autocommit mode (it's only in the autocommit mode
1781     ha_commit_one_phase() can be called with an empty
1782     transaction.all.ha_list, see why in trans_register_ha()).
1783   */
1784   bool is_real_trans= ((all || thd->transaction.all.ha_list == 0) &&
1785                        !(thd->variables.option_bits & OPTION_GTID_BEGIN));
1786   int res;
1787   DBUG_ENTER("ha_commit_one_phase");
1788   if (is_real_trans)
1789   {
1790     DEBUG_SYNC(thd, "ha_commit_one_phase");
1791     if ((res= thd->wait_for_prior_commit()))
1792       DBUG_RETURN(res);
1793   }
1794   res= commit_one_phase_2(thd, all, trans, is_real_trans);
1795   DBUG_RETURN(res);
1796 }
1797 
1798 
1799 static int
1800 commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
1801 {
1802   int error= 0;
1803   uint count= 0;
1804   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1805   DBUG_ENTER("commit_one_phase_2");
1806   if (is_real_trans)
1807     DEBUG_SYNC(thd, "commit_one_phase_2");
1808 
1809   if (ha_info)
1810   {
1811     for (; ha_info; ha_info= ha_info_next)
1812     {
1813       int err;
1814       handlerton *ht= ha_info->ht();
1815       if ((err= ht->commit(ht, thd, all)))
1816       {
1817         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1818         error=1;
1819       }
1820       /* Should this be done only if is_real_trans is set ? */
1821       status_var_increment(thd->status_var.ha_commit_count);
1822       if (is_real_trans && ht != binlog_hton && ha_info->is_trx_read_write())
1823         ++count;
1824       ha_info_next= ha_info->next();
1825       ha_info->reset(); /* keep it conveniently zero-filled */
1826     }
1827     trans->ha_list= 0;
1828     trans->no_2pc=0;
1829     if (all)
1830     {
1831 #ifdef HAVE_QUERY_CACHE
1832       if (thd->transaction.changed_tables)
1833         query_cache.invalidate(thd, thd->transaction.changed_tables);
1834 #endif
1835     }
1836   }
1837 
1838   /* Free resources and perform other cleanup even for 'empty' transactions. */
1839   if (is_real_trans)
1840   {
1841     thd->has_waiter= false;
1842     thd->transaction.cleanup();
1843     if (count >= 2)
1844       statistic_increment(transactions_multi_engine, LOCK_status);
1845   }
1846 
1847   DBUG_RETURN(error);
1848 }
1849 
1850 
1851 int ha_rollback_trans(THD *thd, bool all)
1852 {
1853   int error=0;
1854   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1855   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1856   /*
1857     "real" is a nick name for a transaction for which a commit will
1858     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1859     transaction is not 'real': even though it's possible to commit it,
1860     the changes are not durable as they might be rolled back if the
1861     enclosing 'all' transaction is rolled back.
1862     We establish the value of 'is_real_trans' by checking
1863     if it's an explicit COMMIT or BEGIN statement, or implicit
1864     commit issued by DDL (in these cases all == TRUE),
1865     or if we're running in autocommit mode (it's only in the autocommit mode
1866     ha_commit_one_phase() is called with an empty
1867     transaction.all.ha_list, see why in trans_register_ha()).
1868   */
1869   bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1870   DBUG_ENTER("ha_rollback_trans");
1871 
1872   /*
1873     We must not rollback the normal transaction if a statement
1874     transaction is pending.
1875   */
1876   DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1877               trans == &thd->transaction.stmt);
1878 
1879 #ifdef HAVE_REPLICATION
1880   if (is_real_trans)
1881   {
1882     /*
1883       In parallel replication, if we need to rollback during commit, we must
1884       first inform following transactions that we are going to abort our commit
1885       attempt. Otherwise those following transactions can run too early, and
1886       possibly cause replication to fail. See comments in retry_event_group().
1887 
1888       There were several bugs with this in the past that were very hard to
1889       track down (MDEV-7458, MDEV-8302). So we add here an assertion for
1890       rollback without signalling following transactions. And in release
1891       builds, we explicitly do the signalling before rolling back.
1892     */
1893     DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit));
1894     if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
1895       thd->rgi_slave->unmark_start_commit();
1896   }
1897 #endif
1898 
1899   if (thd->in_sub_stmt)
1900   {
1901     DBUG_ASSERT(0);
1902     /*
1903       If we are inside stored function or trigger we should not commit or
1904       rollback current statement transaction. See comment in ha_commit_trans()
1905       call for more information.
1906     */
1907     if (!all)
1908       DBUG_RETURN(0);
1909     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1910     DBUG_RETURN(1);
1911   }
1912 
1913 #ifdef WITH_WSREP
1914   (void) wsrep_before_rollback(thd, all);
1915 #endif /* WITH_WSREP */
1916   if (ha_info)
1917   {
1918     /* Close all cursors that can not survive ROLLBACK */
1919     if (is_real_trans)                          /* not a statement commit */
1920       thd->stmt_map.close_transient_cursors();
1921 
1922     for (; ha_info; ha_info= ha_info_next)
1923     {
1924       int err;
1925       handlerton *ht= ha_info->ht();
1926       if ((err= ht->rollback(ht, thd, all)))
1927       { // cannot happen
1928         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1929         error=1;
1930 #ifdef WITH_WSREP
1931         WSREP_WARN("handlerton rollback failed, thd %lld %lld conf %d SQL %s",
1932                    thd->thread_id, thd->query_id, thd->wsrep_trx().state(),
1933                    thd->query());
1934 #endif /* WITH_WSREP */
1935       }
1936       status_var_increment(thd->status_var.ha_rollback_count);
1937       ha_info_next= ha_info->next();
1938       ha_info->reset(); /* keep it conveniently zero-filled */
1939     }
1940     trans->ha_list= 0;
1941     trans->no_2pc=0;
1942   }
1943 
1944 #ifdef WITH_WSREP
1945   if (thd->is_error())
1946   {
1947     WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: %s: %s; is_real %d",
1948                 thd->thread_id, all?"TRUE":"FALSE", wsrep_thd_query(thd),
1949                 thd->get_stmt_da()->message(), is_real_trans);
1950   }
1951   (void) wsrep_after_rollback(thd, all);
1952 #endif /* WITH_WSREP */
1953   /* Always cleanup. Even if nht==0. There may be savepoints. */
1954   if (is_real_trans)
1955   {
1956     /*
1957       Thanks to possibility of MDL deadlock rollback request can come even if
1958       transaction hasn't been started in any transactional storage engine.
1959     */
1960     if (thd->transaction_rollback_request &&
1961         thd->transaction.xid_state.is_explicit_XA())
1962       thd->transaction.xid_state.set_error(thd->get_stmt_da()->sql_errno());
1963 
1964     thd->has_waiter= false;
1965     thd->transaction.cleanup();
1966   }
1967   if (all)
1968     thd->transaction_rollback_request= FALSE;
1969 
1970   /*
1971     If a non-transactional table was updated, warn; don't warn if this is a
1972     slave thread (because when a slave thread executes a ROLLBACK, it has
1973     been read from the binary log, so it's 100% sure and normal to produce
1974     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1975     slave SQL thread, it would not stop the thread but just be printed in
1976     the error log; but we don't want users to wonder why they have this
1977     message in the error log, so we don't send it.
1978 
1979     We don't have to test for thd->killed == KILL_SYSTEM_THREAD as
1980     it doesn't matter if a warning is pushed to a system thread or not:
1981     No one will see it...
1982   */
1983   if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1984       !thd->slave_thread && thd->killed < KILL_CONNECTION)
1985     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
1986                  ER_WARNING_NOT_COMPLETE_ROLLBACK,
1987                  ER_THD(thd, ER_WARNING_NOT_COMPLETE_ROLLBACK));
1988 #ifdef HAVE_REPLICATION
1989   repl_semisync_master.wait_after_rollback(thd, all);
1990 #endif
1991   DBUG_RETURN(error);
1992 }
1993 
1994 
1995 struct xahton_st {
1996   XID *xid;
1997   int result;
1998 };
1999 
2000 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
2001                                    void *arg)
2002 {
2003   handlerton *hton= plugin_hton(plugin);
2004   if (hton->state == SHOW_OPTION_YES && hton->recover)
2005   {
2006     hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
2007     ((struct xahton_st *)arg)->result= 0;
2008   }
2009   return FALSE;
2010 }
2011 
2012 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
2013                                      void *arg)
2014 {
2015   handlerton *hton= plugin_hton(plugin);
2016   if (hton->state == SHOW_OPTION_YES && hton->recover)
2017   {
2018     hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
2019     ((struct xahton_st *)arg)->result= 0;
2020   }
2021   return FALSE;
2022 }
2023 
2024 
2025 int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
2026 {
2027   struct xahton_st xaop;
2028   xaop.xid= xid;
2029   xaop.result= 1;
2030 
2031   plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
2032                  MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
2033 
2034   return xaop.result;
2035 }
2036 
2037 
2038 #ifndef DBUG_OFF
2039 /** Converts XID to string.
2040 
2041 @param[out] buf output buffer
2042 @param[in] xid XID to convert
2043 
2044 @return pointer to converted string
2045 
2046 @note This does not need to be multi-byte safe or anything */
2047 char *xid_to_str(char *buf, const XID &xid)
2048 {
2049   int i;
2050   char *s=buf;
2051   *s++='\'';
2052   for (i= 0; i < xid.gtrid_length + xid.bqual_length; i++)
2053   {
2054     uchar c= (uchar) xid.data[i];
2055     /* is_next_dig is set if next character is a number */
2056     bool is_next_dig= FALSE;
2057     if (i < XIDDATASIZE)
2058     {
2059       char ch= xid.data[i + 1];
2060       is_next_dig= (ch >= '0' && ch <='9');
2061     }
2062     if (i == xid.gtrid_length)
2063     {
2064       *s++='\'';
2065       if (xid.bqual_length)
2066       {
2067         *s++='.';
2068         *s++='\'';
2069       }
2070     }
2071     if (c < 32 || c > 126)
2072     {
2073       *s++='\\';
2074       /*
2075         If next character is a number, write current character with
2076         3 octal numbers to ensure that the next number is not seen
2077         as part of the octal number
2078       */
2079       if (c > 077 || is_next_dig)
2080         *s++=_dig_vec_lower[c >> 6];
2081       if (c > 007 || is_next_dig)
2082         *s++=_dig_vec_lower[(c >> 3) & 7];
2083       *s++=_dig_vec_lower[c & 7];
2084     }
2085     else
2086     {
2087       if (c == '\'' || c == '\\')
2088         *s++='\\';
2089       *s++=c;
2090     }
2091   }
2092   *s++='\'';
2093   *s=0;
2094   return buf;
2095 }
2096 #endif
2097 
2098 static my_xid wsrep_order_and_check_continuity(XID *list, int len)
2099 {
2100 #ifdef WITH_WSREP
2101   wsrep_sort_xid_array(list, len);
2102   wsrep::gtid cur_position= wsrep_get_SE_checkpoint();
2103   long long cur_seqno= cur_position.seqno().get();
2104   for (int i= 0; i < len; ++i)
2105   {
2106     if (!wsrep_is_wsrep_xid(list + i) ||
2107         wsrep_xid_seqno(list + i) != cur_seqno + 1)
2108     {
2109       WSREP_WARN("Discovered discontinuity in recovered wsrep "
2110                  "transaction XIDs. Truncating the recovery list to "
2111                  "%d entries", i);
2112       break;
2113     }
2114     ++cur_seqno;
2115   }
2116   WSREP_INFO("Last wsrep seqno to be recovered %lld", cur_seqno);
2117   return (cur_seqno < 0 ? 0 : cur_seqno);
2118 #else
2119   return 0;
2120 #endif /* WITH_WSREP */
2121 }
2122 /**
2123   recover() step of xa.
2124 
2125   @note
2126     there are three modes of operation:
2127     - automatic recover after a crash
2128     in this case commit_list != 0, tc_heuristic_recover==0
2129     all xids from commit_list are committed, others are rolled back
2130     - manual (heuristic) recover
2131     in this case commit_list==0, tc_heuristic_recover != 0
2132     DBA has explicitly specified that all prepared transactions should
2133     be committed (or rolled back).
2134     - no recovery (MySQL did not detect a crash)
2135     in this case commit_list==0, tc_heuristic_recover == 0
2136     there should be no prepared transactions in this case.
2137 */
2138 struct xarecover_st
2139 {
2140   int len, found_foreign_xids, found_my_xids;
2141   XID *list;
2142   HASH *commit_list;
2143   bool dry_run;
2144 };
2145 
2146 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
2147                                     void *arg)
2148 {
2149   handlerton *hton= plugin_hton(plugin);
2150   struct xarecover_st *info= (struct xarecover_st *) arg;
2151   int got;
2152 
2153   if (hton->state == SHOW_OPTION_YES && hton->recover)
2154   {
2155     while ((got= hton->recover(hton, info->list, info->len)) > 0 )
2156     {
2157       sql_print_information("Found %d prepared transaction(s) in %s",
2158                             got, hton_name(hton)->str);
2159       /* If wsrep_on=ON, XIDs are first ordered and then the range of
2160          recovered XIDs is checked for continuity. All the XIDs which
2161          are in continuous range can be safely committed if binlog
2162          is off since they have already ordered and certified in the
2163          cluster.
2164 
2165          The discontinuity of wsrep XIDs may happen because the GTID
2166          is assigned for transaction in wsrep_before_prepare(), but the
2167          commit order is entered in wsrep_before_commit(). This means that
2168          transactions may run prepare step out of order and may
2169          result in gap in wsrep XIDs. This can be the case for example
2170          if we have T1 with seqno 1 and T2 with seqno 2 and the server
2171          crashes after T2 finishes prepare step but before T1 starts
2172          the prepare.
2173       */
2174       my_xid wsrep_limit __attribute__((unused))= 0;
2175 
2176       /* Note that we could call this for binlog also that
2177          will not have WSREP(thd) but global wsrep on might
2178          be true.
2179       */
2180       if (WSREP_ON)
2181         wsrep_limit= wsrep_order_and_check_continuity(info->list, got);
2182 
2183       for (int i=0; i < got; i ++)
2184       {
2185         my_xid x= IF_WSREP(wsrep_is_wsrep_xid(&info->list[i]) ?
2186                            wsrep_xid_seqno(&info->list[i]) :
2187                            info->list[i].get_my_xid(),
2188                            info->list[i].get_my_xid());
2189         if (!x) // not "mine" - that is generated by external TM
2190         {
2191           DBUG_EXECUTE("info",{
2192             char buf[XIDDATASIZE*4+6];
2193             _db_doprnt_("ignore xid %s", xid_to_str(buf, info->list[i]));
2194             });
2195           xid_cache_insert(info->list + i);
2196           info->found_foreign_xids++;
2197           continue;
2198         }
2199         if (IF_WSREP(!(wsrep_emulate_bin_log &&
2200                        wsrep_is_wsrep_xid(info->list + i) &&
2201                        x <= wsrep_limit) && info->dry_run,
2202                      info->dry_run))
2203         {
2204           info->found_my_xids++;
2205           continue;
2206         }
2207         // recovery mode
2208         if (IF_WSREP((wsrep_emulate_bin_log &&
2209                       wsrep_is_wsrep_xid(info->list + i) &&
2210                       x <= wsrep_limit), false) ||
2211             (info->commit_list ?
2212              my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
2213              tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT))
2214         {
2215           int rc= hton->commit_by_xid(hton, info->list+i);
2216           if (rc == 0)
2217           {
2218             DBUG_EXECUTE("info",{
2219               char buf[XIDDATASIZE*4+6];
2220               _db_doprnt_("commit xid %s", xid_to_str(buf, info->list[i]));
2221               });
2222           }
2223         }
2224         else
2225         {
2226           int rc= hton->rollback_by_xid(hton, info->list+i);
2227           if (rc == 0)
2228           {
2229             DBUG_EXECUTE("info",{
2230               char buf[XIDDATASIZE*4+6];
2231               _db_doprnt_("rollback xid %s", xid_to_str(buf, info->list[i]));
2232               });
2233           }
2234         }
2235       }
2236       if (got < info->len)
2237         break;
2238     }
2239   }
2240   return FALSE;
2241 }
2242 
2243 int ha_recover(HASH *commit_list)
2244 {
2245   struct xarecover_st info;
2246   DBUG_ENTER("ha_recover");
2247   info.found_foreign_xids= info.found_my_xids= 0;
2248   info.commit_list= commit_list;
2249   info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
2250   info.list= NULL;
2251 
2252   /* commit_list and tc_heuristic_recover cannot be set both */
2253   DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
2254   /* if either is set, total_ha_2pc must be set too */
2255   DBUG_ASSERT(info.dry_run ||
2256               (failed_ha_2pc + total_ha_2pc) > (ulong)opt_bin_log);
2257 
2258   if (total_ha_2pc <= (ulong)opt_bin_log)
2259     DBUG_RETURN(0);
2260 
2261   if (info.commit_list)
2262     sql_print_information("Starting crash recovery...");
2263 
2264   for (info.len= MAX_XID_LIST_SIZE ;
2265        info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
2266   {
2267     DBUG_EXECUTE_IF("min_xa_len", info.len = 16;);
2268     info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
2269   }
2270   if (!info.list)
2271   {
2272     sql_print_error(ER(ER_OUTOFMEMORY),
2273                     static_cast<int>(info.len*sizeof(XID)));
2274     DBUG_RETURN(1);
2275   }
2276 
2277   plugin_foreach(NULL, xarecover_handlerton,
2278                  MYSQL_STORAGE_ENGINE_PLUGIN, &info);
2279 
2280   my_free(info.list);
2281   if (info.found_foreign_xids)
2282     sql_print_warning("Found %d prepared XA transactions",
2283                       info.found_foreign_xids);
2284   if (info.dry_run && info.found_my_xids)
2285   {
2286     sql_print_error("Found %d prepared transactions! It means that mysqld was "
2287                     "not shut down properly last time and critical recovery "
2288                     "information (last binlog or %s file) was manually deleted "
2289                     "after a crash. You have to start mysqld with "
2290                     "--tc-heuristic-recover switch to commit or rollback "
2291                     "pending transactions.",
2292                     info.found_my_xids, opt_tc_log_file);
2293     DBUG_RETURN(1);
2294   }
2295   if (info.commit_list)
2296     sql_print_information("Crash recovery finished.");
2297   DBUG_RETURN(0);
2298 }
2299 
2300 
2301 /*
2302   Called by engine to notify TC that a new commit checkpoint has been reached.
2303   See comments on handlerton method commit_checkpoint_request() for details.
2304 */
2305 void
2306 commit_checkpoint_notify_ha(handlerton *hton, void *cookie)
2307 {
2308   tc_log->commit_checkpoint_notify(cookie);
2309 }
2310 
2311 
2312 /**
2313   Check if all storage engines used in transaction agree that after
2314   rollback to savepoint it is safe to release MDL locks acquired after
2315   savepoint creation.
2316 
2317   @param thd   The client thread that executes the transaction.
2318 
2319   @return true  - It is safe to release MDL locks.
2320           false - If it is not.
2321 */
2322 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2323 {
2324   Ha_trx_info *ha_info;
2325   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2326                                         &thd->transaction.all);
2327 
2328   DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2329 
2330   /**
2331     Checking whether it is safe to release metadata locks after rollback to
2332     savepoint in all the storage engines that are part of the transaction.
2333   */
2334   for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2335   {
2336     handlerton *ht= ha_info->ht();
2337     DBUG_ASSERT(ht);
2338 
2339     if (ht->savepoint_rollback_can_release_mdl == 0 ||
2340         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2341       DBUG_RETURN(false);
2342   }
2343 
2344   DBUG_RETURN(true);
2345 }
2346 
2347 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2348 {
2349   int error=0;
2350   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2351                                         &thd->transaction.all);
2352   Ha_trx_info *ha_info, *ha_info_next;
2353 
2354   DBUG_ENTER("ha_rollback_to_savepoint");
2355 
2356   trans->no_2pc=0;
2357   /*
2358     rolling back to savepoint in all storage engines that were part of the
2359     transaction when the savepoint was set
2360   */
2361   for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2362   {
2363     int err;
2364     handlerton *ht= ha_info->ht();
2365     DBUG_ASSERT(ht);
2366     DBUG_ASSERT(ht->savepoint_set != 0);
2367     if ((err= ht->savepoint_rollback(ht, thd,
2368                                      (uchar *)(sv+1)+ht->savepoint_offset)))
2369     { // cannot happen
2370       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2371       error=1;
2372     }
2373     status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2374     trans->no_2pc|= ht->prepare == 0;
2375   }
2376   /*
2377     rolling back the transaction in all storage engines that were not part of
2378     the transaction when the savepoint was set
2379   */
2380   for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2381        ha_info= ha_info_next)
2382   {
2383     int err;
2384     handlerton *ht= ha_info->ht();
2385 #ifdef WITH_WSREP
2386     if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2387     {
2388       WSREP_DEBUG("ha_rollback_to_savepoint: run before_rollbackha_rollback_trans hook");
2389       (void) wsrep_before_rollback(thd, !thd->in_sub_stmt);
2390 
2391     }
2392 #endif // WITH_WSREP
2393     if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2394     { // cannot happen
2395       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2396       error=1;
2397     }
2398 #ifdef WITH_WSREP
2399     if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION)
2400     {
2401       WSREP_DEBUG("ha_rollback_to_savepoint: run after_rollback hook");
2402       (void) wsrep_after_rollback(thd, !thd->in_sub_stmt);
2403     }
2404 #endif // WITH_WSREP
2405     status_var_increment(thd->status_var.ha_rollback_count);
2406     ha_info_next= ha_info->next();
2407     ha_info->reset(); /* keep it conveniently zero-filled */
2408   }
2409   trans->ha_list= sv->ha_list;
2410   DBUG_RETURN(error);
2411 }
2412 
2413 /**
2414   @note
2415   according to the sql standard (ISO/IEC 9075-2:2003)
2416   section "4.33.4 SQL-statements and transaction states",
2417   SAVEPOINT is *not* transaction-initiating SQL-statement
2418 */
2419 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2420 {
2421 #ifdef WITH_WSREP
2422   /*
2423     Register binlog hton for savepoint processing if wsrep binlog
2424     emulation is on.
2425    */
2426   if (WSREP_EMULATE_BINLOG(thd) && wsrep_thd_is_local(thd))
2427   {
2428     wsrep_register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
2429   }
2430 #endif /* WITH_WSREP */
2431   int error=0;
2432   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2433                                         &thd->transaction.all);
2434   Ha_trx_info *ha_info= trans->ha_list;
2435   DBUG_ENTER("ha_savepoint");
2436 
2437   for (; ha_info; ha_info= ha_info->next())
2438   {
2439     int err;
2440     handlerton *ht= ha_info->ht();
2441     DBUG_ASSERT(ht);
2442     if (! ht->savepoint_set)
2443     {
2444       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2445       error=1;
2446       break;
2447     }
2448     if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2449     { // cannot happen
2450       my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2451       error=1;
2452     }
2453     status_var_increment(thd->status_var.ha_savepoint_count);
2454   }
2455   /*
2456     Remember the list of registered storage engines. All new
2457     engines are prepended to the beginning of the list.
2458   */
2459   sv->ha_list= trans->ha_list;
2460 
2461   DBUG_RETURN(error);
2462 }
2463 
2464 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2465 {
2466   int error=0;
2467   Ha_trx_info *ha_info= sv->ha_list;
2468   DBUG_ENTER("ha_release_savepoint");
2469 
2470   for (; ha_info; ha_info= ha_info->next())
2471   {
2472     int err;
2473     handlerton *ht= ha_info->ht();
2474     /* Savepoint life time is enclosed into transaction life time. */
2475     DBUG_ASSERT(ht);
2476     if (!ht->savepoint_release)
2477       continue;
2478     if ((err= ht->savepoint_release(ht, thd,
2479                                     (uchar *)(sv+1) + ht->savepoint_offset)))
2480     { // cannot happen
2481       my_error(ER_GET_ERRNO, MYF(0), err, hton_name(ht)->str);
2482       error=1;
2483     }
2484   }
2485   DBUG_RETURN(error);
2486 }
2487 
2488 
2489 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2490                                    void *arg)
2491 {
2492   handlerton *hton= plugin_hton(plugin);
2493   if (hton->state == SHOW_OPTION_YES &&
2494       hton->start_consistent_snapshot)
2495   {
2496     if (hton->start_consistent_snapshot(hton, thd))
2497       return TRUE;
2498     *((bool *)arg)= false;
2499   }
2500   return FALSE;
2501 }
2502 
2503 int ha_start_consistent_snapshot(THD *thd)
2504 {
2505   bool err, warn= true;
2506 
2507   /*
2508     Holding the LOCK_commit_ordered mutex ensures that we get the same
2509     snapshot for all engines (including the binary log).  This allows us
2510     among other things to do backups with
2511     START TRANSACTION WITH CONSISTENT SNAPSHOT and
2512     have a consistent binlog position.
2513   */
2514   mysql_mutex_lock(&LOCK_commit_ordered);
2515   err= plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2516   mysql_mutex_unlock(&LOCK_commit_ordered);
2517 
2518   if (err)
2519   {
2520     ha_rollback_trans(thd, true);
2521     return 1;
2522   }
2523 
2524   /*
2525     Same idea as when one wants to CREATE TABLE in one engine which does not
2526     exist:
2527   */
2528   if (warn)
2529     push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2530                  "This MariaDB server does not support any "
2531                  "consistent-read capable storage engine");
2532   return 0;
2533 }
2534 
2535 
2536 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2537                                 void *arg)
2538 {
2539   handlerton *hton= plugin_hton(plugin);
2540   if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2541       hton->flush_logs(hton))
2542     return TRUE;
2543   return FALSE;
2544 }
2545 
2546 
2547 bool ha_flush_logs(handlerton *db_type)
2548 {
2549   if (db_type == NULL)
2550   {
2551     if (plugin_foreach(NULL, flush_handlerton,
2552                           MYSQL_STORAGE_ENGINE_PLUGIN, 0))
2553       return TRUE;
2554   }
2555   else
2556   {
2557     if (db_type->state != SHOW_OPTION_YES ||
2558         (db_type->flush_logs && db_type->flush_logs(db_type)))
2559       return TRUE;
2560   }
2561   return FALSE;
2562 }
2563 
2564 
2565 /**
2566   @brief make canonical filename
2567 
2568   @param[in]  file     table handler
2569   @param[in]  path     original path
2570   @param[out] tmp_path buffer for canonized path
2571 
2572   @details Lower case db name and table name path parts for
2573            non file based tables when lower_case_table_names
2574            is 2 (store as is, compare in lower case).
2575            Filesystem path prefix (mysql_data_home or tmpdir)
2576            is left intact.
2577 
2578   @note tmp_path may be left intact if no conversion was
2579         performed.
2580 
2581   @retval canonized path
2582 
2583   @todo This may be done more efficiently when table path
2584         gets built. Convert this function to something like
2585         ASSERT_CANONICAL_FILENAME.
2586 */
2587 const char *get_canonical_filename(handler *file, const char *path,
2588                                    char *tmp_path)
2589 {
2590   uint i;
2591   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2592     return path;
2593 
2594   for (i= 0; i <= mysql_tmpdir_list.max; i++)
2595   {
2596     if (is_prefix(path, mysql_tmpdir_list.list[i]))
2597       return path;
2598   }
2599 
2600   /* Ensure that table handler get path in lower case */
2601   if (tmp_path != path)
2602     strmov(tmp_path, path);
2603 
2604   /*
2605     we only should turn into lowercase database/table part
2606     so start the process after homedirectory
2607   */
2608   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2609   return tmp_path;
2610 }
2611 
2612 
2613 /** delete a table in the engine
2614 
2615   @note
2616   ENOENT and HA_ERR_NO_SUCH_TABLE are not considered errors.
2617   The .frm file will be deleted only if we return 0.
2618 */
2619 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2620                     const LEX_CSTRING *db, const LEX_CSTRING *alias, bool generate_warning)
2621 {
2622   handler *file;
2623   char tmp_path[FN_REFLEN];
2624   int error;
2625   TABLE dummy_table;
2626   TABLE_SHARE dummy_share;
2627   DBUG_ENTER("ha_delete_table");
2628 
2629   /* table_type is NULL in ALTER TABLE when renaming only .frm files */
2630   if (table_type == NULL || table_type == view_pseudo_hton ||
2631       ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2632     DBUG_RETURN(0);
2633 
2634   bzero((char*) &dummy_table, sizeof(dummy_table));
2635   bzero((char*) &dummy_share, sizeof(dummy_share));
2636   dummy_table.s= &dummy_share;
2637 
2638   path= get_canonical_filename(file, path, tmp_path);
2639   if (unlikely((error= file->ha_delete_table(path))))
2640   {
2641     /*
2642       it's not an error if the table doesn't exist in the engine.
2643       warn the user, but still report DROP being a success
2644     */
2645     bool intercept= error == ENOENT || error == HA_ERR_NO_SUCH_TABLE;
2646 
2647     if (!intercept || generate_warning)
2648     {
2649       /* Fill up strucutures that print_error may need */
2650       dummy_share.path.str= (char*) path;
2651       dummy_share.path.length= strlen(path);
2652       dummy_share.normalized_path= dummy_share.path;
2653       dummy_share.db= *db;
2654       dummy_share.table_name= *alias;
2655       dummy_table.alias.set(alias->str, alias->length, table_alias_charset);
2656       file->change_table_ptr(&dummy_table, &dummy_share);
2657       file->print_error(error, MYF(intercept ? ME_WARNING : 0));
2658     }
2659     if (intercept)
2660       error= 0;
2661   }
2662   delete file;
2663 
2664   DBUG_RETURN(error);
2665 }
2666 
2667 /****************************************************************************
2668 ** General handler functions
2669 ****************************************************************************/
2670 
2671 
2672 /**
2673    Clone a handler
2674 
2675    @param name     name of new table instance
2676    @param mem_root Where 'this->ref' should be allocated. It can't be
2677                    in this->table->mem_root as otherwise we will not be
2678                    able to reclaim that memory when the clone handler
2679                    object is destroyed.
2680 */
2681 
2682 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2683 {
2684   handler *new_handler= get_new_handler(table->s, mem_root, ht);
2685 
2686   if (!new_handler)
2687     return NULL;
2688   if (new_handler->set_ha_share_ref(ha_share))
2689     goto err;
2690 
2691   /*
2692     TODO: Implement a more efficient way to have more than one index open for
2693     the same table instance. The ha_open call is not cacheable for clone.
2694 
2695     This is not critical as the engines already have the table open
2696     and should be able to use the original instance of the table.
2697   */
2698   if (new_handler->ha_open(table, name, table->db_stat,
2699                            HA_OPEN_IGNORE_IF_LOCKED, mem_root))
2700     goto err;
2701 
2702   return new_handler;
2703 
2704 err:
2705   delete new_handler;
2706   return NULL;
2707 }
2708 
2709 LEX_CSTRING *handler::engine_name()
2710 {
2711   return hton_name(ht);
2712 }
2713 
2714 
2715 /*
2716   It is assumed that the value of the parameter 'ranges' can be only 0 or 1.
2717   If ranges == 1 then the function returns the cost of index only scan
2718   by index 'keyno' of one range containing 'rows' key entries.
2719   If ranges == 0 then the function returns only the cost of copying
2720   those key entries into the engine buffers.
2721 */
2722 
2723 double handler::keyread_time(uint index, uint ranges, ha_rows rows)
2724 {
2725   DBUG_ASSERT(ranges == 0 || ranges == 1);
2726   size_t len= table->key_info[index].key_length + ref_length;
2727   if (index == table->s->primary_key && table->file->primary_key_is_clustered())
2728     len= table->s->stored_rec_length;
2729   double cost= (double)rows*len/(stats.block_size+1)*IDX_BLOCK_COPY_COST;
2730   if (ranges)
2731   {
2732     uint keys_per_block= (uint) (stats.block_size/2.0/len+1);
2733     ulonglong blocks= !rows ? 0 : (rows-1) / keys_per_block + 1;
2734     cost+= blocks;
2735   }
2736   return cost;
2737 }
2738 
2739 void **handler::ha_data(THD *thd) const
2740 {
2741   return thd_ha_data(thd, ht);
2742 }
2743 
2744 THD *handler::ha_thd(void) const
2745 {
2746   DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2747   return (table && table->in_use) ? table->in_use : current_thd;
2748 }
2749 
2750 void handler::unbind_psi()
2751 {
2752   /*
2753     Notify the instrumentation that this table is not owned
2754     by this thread any more.
2755   */
2756   PSI_CALL_unbind_table(m_psi);
2757 }
2758 
2759 void handler::rebind_psi()
2760 {
2761   /*
2762     Notify the instrumentation that this table is now owned
2763     by this thread.
2764   */
2765   m_psi= PSI_CALL_rebind_table(ha_table_share_psi(), this, m_psi);
2766 }
2767 
2768 
2769 PSI_table_share *handler::ha_table_share_psi() const
2770 {
2771   return table_share->m_psi;
2772 }
2773 
2774 /** @brief
2775   Open database-handler.
2776 
2777   IMPLEMENTATION
2778     Try O_RDONLY if cannot open as O_RDWR
2779     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2780 */
2781 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2782                      uint test_if_locked, MEM_ROOT *mem_root,
2783                      List<String> *partitions_to_open)
2784 {
2785   int error;
2786   DBUG_ENTER("handler::ha_open");
2787   DBUG_PRINT("enter",
2788              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2789               name, ht->db_type, table_arg->db_stat, mode,
2790               test_if_locked));
2791 
2792   table= table_arg;
2793   DBUG_ASSERT(table->s == table_share);
2794   DBUG_ASSERT(m_lock_type == F_UNLCK);
2795   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2796   DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2797 
2798   set_partitions_to_open(partitions_to_open);
2799 
2800   if (unlikely((error=open(name,mode,test_if_locked))))
2801   {
2802     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2803 	(table->db_stat & HA_TRY_READ_ONLY))
2804     {
2805       table->db_stat|=HA_READ_ONLY;
2806       error=open(name,O_RDONLY,test_if_locked);
2807     }
2808   }
2809   if (unlikely(error))
2810   {
2811     my_errno= error;                            /* Safeguard */
2812     DBUG_PRINT("error",("error: %d  errno: %d",error,errno));
2813   }
2814   else
2815   {
2816     DBUG_ASSERT(m_psi == NULL);
2817     DBUG_ASSERT(table_share != NULL);
2818     /*
2819       Do not call this for partitions handlers, since it may take too much
2820       resources.
2821       So only use the m_psi on table level, not for individual partitions.
2822     */
2823     if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2824     {
2825       m_psi= PSI_CALL_open_table(ha_table_share_psi(), this);
2826     }
2827 
2828     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2829       table->db_stat|=HA_READ_ONLY;
2830     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
2831 
2832     /* Allocate ref in thd or on the table's mem_root */
2833     if (!(ref= (uchar*) alloc_root(mem_root ? mem_root : &table->mem_root,
2834                                    ALIGN_SIZE(ref_length)*2)))
2835     {
2836       ha_close();
2837       error=HA_ERR_OUT_OF_MEM;
2838     }
2839     else
2840       dup_ref=ref+ALIGN_SIZE(ref_length);
2841     cached_table_flags= table_flags();
2842   }
2843   reset_statistics();
2844   internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
2845 
2846   DBUG_RETURN(error);
2847 }
2848 
2849 int handler::ha_close(void)
2850 {
2851   DBUG_ENTER("ha_close");
2852   /*
2853     Increment global statistics for temporary tables.
2854     In_use is 0 for tables that was closed from the table cache.
2855   */
2856   if (table->in_use)
2857     status_var_add(table->in_use->status_var.rows_tmp_read, rows_tmp_read);
2858   PSI_CALL_close_table(m_psi);
2859   m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2860 
2861   /* Detach from ANALYZE tracker */
2862   tracker= NULL;
2863 
2864   DBUG_ASSERT(m_lock_type == F_UNLCK);
2865   DBUG_ASSERT(inited == NONE);
2866   DBUG_RETURN(close());
2867 }
2868 
2869 
2870 int handler::ha_rnd_next(uchar *buf)
2871 {
2872   int result;
2873   DBUG_ENTER("handler::ha_rnd_next");
2874   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2875               m_lock_type != F_UNLCK);
2876   DBUG_ASSERT(inited == RND);
2877 
2878   do
2879   {
2880     TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2881       { result= rnd_next(buf); })
2882     if (result != HA_ERR_RECORD_DELETED)
2883       break;
2884     status_var_increment(table->in_use->status_var.ha_read_rnd_deleted_count);
2885   } while (!table->in_use->check_killed(1));
2886 
2887   if (result == HA_ERR_RECORD_DELETED)
2888     result= HA_ERR_ABORTED_BY_USER;
2889   else
2890   {
2891     if (!result)
2892     {
2893       update_rows_read();
2894       if (table->vfield && buf == table->record[0])
2895         table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2896     }
2897     increment_statistics(&SSV::ha_read_rnd_next_count);
2898   }
2899 
2900   table->status=result ? STATUS_NOT_FOUND: 0;
2901   DBUG_RETURN(result);
2902 }
2903 
2904 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2905 {
2906   int result;
2907   DBUG_ENTER("handler::ha_rnd_pos");
2908   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2909               m_lock_type != F_UNLCK);
2910   DBUG_ASSERT(inited == RND);
2911 
2912   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2913     { result= rnd_pos(buf, pos); })
2914   increment_statistics(&SSV::ha_read_rnd_count);
2915   if (result == HA_ERR_RECORD_DELETED)
2916     result= HA_ERR_KEY_NOT_FOUND;
2917   else if (!result)
2918   {
2919     update_rows_read();
2920     if (table->vfield && buf == table->record[0])
2921       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2922   }
2923   table->status=result ? STATUS_NOT_FOUND: 0;
2924   DBUG_RETURN(result);
2925 }
2926 
2927 int handler::ha_index_read_map(uchar *buf, const uchar *key,
2928                                       key_part_map keypart_map,
2929                                       enum ha_rkey_function find_flag)
2930 {
2931   int result;
2932   DBUG_ENTER("handler::ha_index_read_map");
2933   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2934               m_lock_type != F_UNLCK);
2935   DBUG_ASSERT(inited==INDEX);
2936 
2937   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2938     { result= index_read_map(buf, key, keypart_map, find_flag); })
2939   increment_statistics(&SSV::ha_read_key_count);
2940   if (!result)
2941   {
2942     update_index_statistics();
2943     if (table->vfield && buf == table->record[0])
2944       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2945   }
2946   table->status=result ? STATUS_NOT_FOUND: 0;
2947   DBUG_RETURN(result);
2948 }
2949 
2950 /*
2951   @note: Other index lookup/navigation functions require prior
2952   handler->index_init() call. This function is different, it requires
2953   that the scan is not initialized, and accepts "uint index" as an argument.
2954 */
2955 
2956 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
2957                                           key_part_map keypart_map,
2958                                           enum ha_rkey_function find_flag)
2959 {
2960   int result;
2961   DBUG_ASSERT(inited==NONE);
2962   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2963               m_lock_type != F_UNLCK);
2964   DBUG_ASSERT(end_range == NULL);
2965   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, index, 0,
2966     { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
2967   increment_statistics(&SSV::ha_read_key_count);
2968   if (!result)
2969   {
2970     update_rows_read();
2971     index_rows_read[index]++;
2972     if (table->vfield && buf == table->record[0])
2973       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2974   }
2975   table->status=result ? STATUS_NOT_FOUND: 0;
2976   return result;
2977 }
2978 
2979 int handler::ha_index_next(uchar * buf)
2980 {
2981   int result;
2982   DBUG_ENTER("handler::ha_index_next");
2983  DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2984               m_lock_type != F_UNLCK);
2985   DBUG_ASSERT(inited==INDEX);
2986 
2987   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
2988     { result= index_next(buf); })
2989   increment_statistics(&SSV::ha_read_next_count);
2990   if (!result)
2991   {
2992     update_index_statistics();
2993     if (table->vfield && buf == table->record[0])
2994       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
2995   }
2996   table->status=result ? STATUS_NOT_FOUND: 0;
2997   DBUG_RETURN(result);
2998 }
2999 
3000 int handler::ha_index_prev(uchar * buf)
3001 {
3002   int result;
3003   DBUG_ENTER("handler::ha_index_prev");
3004   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3005               m_lock_type != F_UNLCK);
3006   DBUG_ASSERT(inited==INDEX);
3007 
3008   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3009     { result= index_prev(buf); })
3010   increment_statistics(&SSV::ha_read_prev_count);
3011   if (!result)
3012   {
3013     update_index_statistics();
3014     if (table->vfield && buf == table->record[0])
3015       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3016   }
3017   table->status=result ? STATUS_NOT_FOUND: 0;
3018   DBUG_RETURN(result);
3019 }
3020 
3021 int handler::ha_index_first(uchar * buf)
3022 {
3023   int result;
3024   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3025               m_lock_type != F_UNLCK);
3026   DBUG_ASSERT(inited==INDEX);
3027 
3028   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3029     { result= index_first(buf); })
3030   increment_statistics(&SSV::ha_read_first_count);
3031   if (!result)
3032   {
3033     update_index_statistics();
3034     if (table->vfield && buf == table->record[0])
3035       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3036   }
3037   table->status=result ? STATUS_NOT_FOUND: 0;
3038   return result;
3039 }
3040 
3041 int handler::ha_index_last(uchar * buf)
3042 {
3043   int result;
3044   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3045               m_lock_type != F_UNLCK);
3046   DBUG_ASSERT(inited==INDEX);
3047 
3048   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3049     { result= index_last(buf); })
3050   increment_statistics(&SSV::ha_read_last_count);
3051   if (!result)
3052   {
3053     update_index_statistics();
3054     if (table->vfield && buf == table->record[0])
3055       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3056   }
3057   table->status=result ? STATUS_NOT_FOUND: 0;
3058   return result;
3059 }
3060 
3061 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3062 {
3063   int result;
3064   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3065               m_lock_type != F_UNLCK);
3066   DBUG_ASSERT(inited==INDEX);
3067 
3068   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3069     { result= index_next_same(buf, key, keylen); })
3070   increment_statistics(&SSV::ha_read_next_count);
3071   if (!result)
3072   {
3073     update_index_statistics();
3074     if (table->vfield && buf == table->record[0])
3075       table->update_virtual_fields(this, VCOL_UPDATE_FOR_READ);
3076   }
3077   table->status=result ? STATUS_NOT_FOUND: 0;
3078   return result;
3079 }
3080 
3081 
3082 bool handler::ha_was_semi_consistent_read()
3083 {
3084   bool result= was_semi_consistent_read();
3085   if (result)
3086     increment_statistics(&SSV::ha_read_retry_count);
3087   return result;
3088 }
3089 
3090 /* Initialize handler for random reading, with error handling */
3091 
3092 int handler::ha_rnd_init_with_error(bool scan)
3093 {
3094   int error;
3095   if (likely(!(error= ha_rnd_init(scan))))
3096     return 0;
3097   table->file->print_error(error, MYF(0));
3098   return error;
3099 }
3100 
3101 
3102 /**
3103   Read first row (only) from a table. Used for reading tables with
3104   only one row, either based on table statistics or if table is a SEQUENCE.
3105 
3106   This is never called for normal InnoDB tables, as these table types
3107   does not have HA_STATS_RECORDS_IS_EXACT set.
3108 */
3109 int handler::read_first_row(uchar * buf, uint primary_key)
3110 {
3111   int error;
3112   DBUG_ENTER("handler::read_first_row");
3113 
3114   /*
3115     If there is very few deleted rows in the table, find the first row by
3116     scanning the table.
3117     TODO remove the test for HA_READ_ORDER
3118   */
3119   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3120       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3121   {
3122     if (likely(!(error= ha_rnd_init(1))))
3123     {
3124       error= ha_rnd_next(buf);
3125       const int end_error= ha_rnd_end();
3126       if (likely(!error))
3127         error= end_error;
3128     }
3129   }
3130   else
3131   {
3132     /* Find the first row through the primary key */
3133     if (likely(!(error= ha_index_init(primary_key, 0))))
3134     {
3135       error= ha_index_first(buf);
3136       const int end_error= ha_index_end();
3137       if (likely(!error))
3138         error= end_error;
3139     }
3140   }
3141   DBUG_RETURN(error);
3142 }
3143 
3144 /**
3145   Generate the next auto-increment number based on increment and offset.
3146   computes the lowest number
3147   - strictly greater than "nr"
3148   - of the form: auto_increment_offset + N * auto_increment_increment
3149   If overflow happened then return MAX_ULONGLONG value as an
3150   indication of overflow.
3151   In most cases increment= offset= 1, in which case we get:
3152   @verbatim 1,2,3,4,5,... @endverbatim
3153     If increment=10 and offset=5 and previous number is 1, we get:
3154   @verbatim 1,5,15,25,35,... @endverbatim
3155 */
3156 inline ulonglong
3157 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3158 {
3159   const ulonglong save_nr= nr;
3160 
3161   if (variables->auto_increment_increment == 1)
3162     nr= nr + 1; // optimization of the formula below
3163   else
3164   {
3165     /*
3166        Calculating the number of complete auto_increment_increment extents:
3167     */
3168     nr= (nr + variables->auto_increment_increment -
3169          variables->auto_increment_offset) /
3170         (ulonglong) variables->auto_increment_increment;
3171     /*
3172        Adding an offset to the auto_increment_increment extent boundary:
3173     */
3174     nr= nr * (ulonglong) variables->auto_increment_increment +
3175         variables->auto_increment_offset;
3176   }
3177 
3178   if (unlikely(nr <= save_nr))
3179     return ULONGLONG_MAX;
3180 
3181   return nr;
3182 }
3183 
3184 
3185 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3186 {
3187   /*
3188     If we have set THD::next_insert_id previously and plan to insert an
3189     explicitly-specified value larger than this, we need to increase
3190     THD::next_insert_id to be greater than the explicit value.
3191   */
3192   if ((next_insert_id > 0) && (nr >= next_insert_id))
3193     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3194 }
3195 
3196 
3197 /** @brief
3198   Computes the largest number X:
3199   - smaller than or equal to "nr"
3200   - of the form: auto_increment_offset + N * auto_increment_increment
3201   where N>=0.
3202 
3203   SYNOPSIS
3204     prev_insert_id
3205       nr            Number to "round down"
3206       variables     variables struct containing auto_increment_increment and
3207                     auto_increment_offset
3208 
3209   RETURN
3210     The number X if it exists, "nr" otherwise.
3211 */
3212 inline ulonglong
3213 prev_insert_id(ulonglong nr, struct system_variables *variables)
3214 {
3215   if (unlikely(nr < variables->auto_increment_offset))
3216   {
3217     /*
3218       There's nothing good we can do here. That is a pathological case, where
3219       the offset is larger than the column's max possible value, i.e. not even
3220       the first sequence value may be inserted. User will receive warning.
3221     */
3222     DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3223                        "auto_increment_offset: %lu",
3224                        (ulong) nr, variables->auto_increment_offset));
3225     return nr;
3226   }
3227   if (variables->auto_increment_increment == 1)
3228     return nr; // optimization of the formula below
3229   /*
3230      Calculating the number of complete auto_increment_increment extents:
3231   */
3232   nr= (nr - variables->auto_increment_offset) /
3233       (ulonglong) variables->auto_increment_increment;
3234   /*
3235      Adding an offset to the auto_increment_increment extent boundary:
3236   */
3237   return (nr * (ulonglong) variables->auto_increment_increment +
3238           variables->auto_increment_offset);
3239 }
3240 
3241 
3242 /**
3243   Update the auto_increment field if necessary.
3244 
3245   Updates columns with type NEXT_NUMBER if:
3246 
3247   - If column value is set to NULL (in which case
3248     auto_increment_field_not_null is 0)
3249   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3250     set. In the future we will only set NEXT_NUMBER fields if one sets them
3251     to NULL (or they are not included in the insert list).
3252 
3253     In those cases, we check if the currently reserved interval still has
3254     values we have not used. If yes, we pick the smallest one and use it.
3255     Otherwise:
3256 
3257   - If a list of intervals has been provided to the statement via SET
3258     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3259     first unused interval from this list, consider it as reserved.
3260 
3261   - Otherwise we set the column for the first row to the value
3262     next_insert_id(get_auto_increment(column))) which is usually
3263     max-used-column-value+1.
3264     We call get_auto_increment() for the first row in a multi-row
3265     statement. get_auto_increment() will tell us the interval of values it
3266     reserved for us.
3267 
3268   - In both cases, for the following rows we use those reserved values without
3269     calling the handler again (we just progress in the interval, computing
3270     each new value from the previous one). Until we have exhausted them, then
3271     we either take the next provided interval or call get_auto_increment()
3272     again to reserve a new interval.
3273 
3274   - In both cases, the reserved intervals are remembered in
3275     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3276     binlogging; the last reserved interval is remembered in
3277     auto_inc_interval_for_cur_row. The number of reserved intervals is
3278     remembered in auto_inc_intervals_count. It differs from the number of
3279     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3280     latter list is cumulative over all statements forming one binlog event
3281     (when stored functions and triggers are used), and collapses two
3282     contiguous intervals in one (see its append() method).
3283 
3284     The idea is that generated auto_increment values are predictable and
3285     independent of the column values in the table.  This is needed to be
3286     able to replicate into a table that already has rows with a higher
3287     auto-increment value than the one that is inserted.
3288 
3289     After we have already generated an auto-increment number and the user
3290     inserts a column with a higher value than the last used one, we will
3291     start counting from the inserted value.
3292 
3293     This function's "outputs" are: the table's auto_increment field is filled
3294     with a value, thd->next_insert_id is filled with the value to use for the
3295     next row, if a value was autogenerated for the current row it is stored in
3296     thd->insert_id_for_cur_row, if get_auto_increment() was called
3297     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3298     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3299     this list.
3300 
3301   @todo
3302     Replace all references to "next number" or NEXT_NUMBER to
3303     "auto_increment", everywhere (see below: there is
3304     table->auto_increment_field_not_null, and there also exists
3305     table->next_number_field, it's not consistent).
3306 
3307   @retval
3308     0	ok
3309   @retval
3310     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3311     returned ~(ulonglong) 0
3312   @retval
3313     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3314     failure.
3315 */
3316 
3317 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3318 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3319 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3320 
3321 int handler::update_auto_increment()
3322 {
3323   ulonglong nr, nb_reserved_values;
3324   bool append= FALSE;
3325   THD *thd= table->in_use;
3326   struct system_variables *variables= &thd->variables;
3327   int result=0, tmp;
3328   DBUG_ENTER("handler::update_auto_increment");
3329 
3330   /*
3331     next_insert_id is a "cursor" into the reserved interval, it may go greater
3332     than the interval, but not smaller.
3333   */
3334   DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3335 
3336   if ((nr= table->next_number_field->val_int()) != 0 ||
3337       (table->auto_increment_field_not_null &&
3338        thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3339   {
3340 
3341     /*
3342       There could be an error reported because value was truncated
3343       when strict mode is enabled.
3344     */
3345     if (thd->is_error())
3346       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3347     /*
3348       Update next_insert_id if we had already generated a value in this
3349       statement (case of INSERT VALUES(null),(3763),(null):
3350       the last NULL needs to insert 3764, not the value of the first NULL plus
3351       1).
3352       Ignore negative values.
3353     */
3354     if ((longlong) nr > 0 || (table->next_number_field->flags & UNSIGNED_FLAG))
3355       adjust_next_insert_id_after_explicit_value(nr);
3356     insert_id_for_cur_row= 0; // didn't generate anything
3357     DBUG_RETURN(0);
3358   }
3359 
3360   if (table->versioned())
3361   {
3362     Field *end= table->vers_end_field();
3363     DBUG_ASSERT(end);
3364     bitmap_set_bit(table->read_set, end->field_index);
3365     if (!end->is_max())
3366     {
3367       if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3368       {
3369         if (!table->next_number_field->real_maybe_null())
3370           DBUG_RETURN(HA_ERR_UNSUPPORTED);
3371         table->next_number_field->set_null();
3372       }
3373       DBUG_RETURN(0);
3374     }
3375   }
3376 
3377   // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT
3378   if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
3379     table->next_number_field->set_notnull();
3380 
3381   if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3382   {
3383     /* next_insert_id is beyond what is reserved, so we reserve more. */
3384     const Discrete_interval *forced=
3385       thd->auto_inc_intervals_forced.get_next();
3386     if (forced != NULL)
3387     {
3388       nr= forced->minimum();
3389       nb_reserved_values= forced->values();
3390     }
3391     else
3392     {
3393       /*
3394         handler::estimation_rows_to_insert was set by
3395         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3396       */
3397       ulonglong nb_desired_values;
3398       /*
3399         If an estimation was given to the engine:
3400         - use it.
3401         - if we already reserved numbers, it means the estimation was
3402         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3403         time, twice that the 3rd time etc.
3404         If no estimation was given, use those increasing defaults from the
3405         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3406         Don't go beyond a max to not reserve "way too much" (because
3407         reservation means potentially losing unused values).
3408         Note that in prelocked mode no estimation is given.
3409       */
3410 
3411       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3412         nb_desired_values= estimation_rows_to_insert;
3413       else if ((auto_inc_intervals_count == 0) &&
3414                (thd->lex->many_values.elements > 0))
3415       {
3416         /*
3417           For multi-row inserts, if the bulk inserts cannot be started, the
3418           handler::estimation_rows_to_insert will not be set. But we still
3419           want to reserve the autoinc values.
3420         */
3421         nb_desired_values= thd->lex->many_values.elements;
3422       }
3423       else /* go with the increasing defaults */
3424       {
3425         /* avoid overflow in formula, with this if() */
3426         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3427         {
3428           nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3429             (1 << auto_inc_intervals_count);
3430           set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3431         }
3432         else
3433           nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3434       }
3435       get_auto_increment(variables->auto_increment_offset,
3436                          variables->auto_increment_increment,
3437                          nb_desired_values, &nr,
3438                          &nb_reserved_values);
3439       if (nr == ULONGLONG_MAX)
3440         DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
3441 
3442       /*
3443         That rounding below should not be needed when all engines actually
3444         respect offset and increment in get_auto_increment(). But they don't
3445         so we still do it. Wonder if for the not-first-in-index we should do
3446         it. Hope that this rounding didn't push us out of the interval; even
3447         if it did we cannot do anything about it (calling the engine again
3448         will not help as we inserted no row).
3449       */
3450       nr= compute_next_insert_id(nr-1, variables);
3451     }
3452 
3453     if (table->s->next_number_keypart == 0)
3454     {
3455       /* We must defer the appending until "nr" has been possibly truncated */
3456       append= TRUE;
3457     }
3458     else
3459     {
3460       /*
3461         For such auto_increment there is no notion of interval, just a
3462         singleton. The interval is not even stored in
3463         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3464         for next row.
3465       */
3466       DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3467     }
3468   }
3469 
3470   if (unlikely(nr == ULONGLONG_MAX))
3471       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3472 
3473   DBUG_ASSERT(nr != 0);
3474   DBUG_PRINT("info",("auto_increment: %llu  nb_reserved_values: %llu",
3475                      nr, append ? nb_reserved_values : 0));
3476 
3477   /* Store field without warning (Warning will be printed by insert) */
3478   {
3479     Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE);
3480     tmp= table->next_number_field->store((longlong)nr, TRUE);
3481   }
3482 
3483   if (unlikely(tmp))                            // Out of range value in store
3484   {
3485     /*
3486       First, test if the query was aborted due to strict mode constraints
3487       or new field value greater than maximum integer value:
3488     */
3489     if (thd->killed == KILL_BAD_DATA ||
3490         nr > table->next_number_field->get_max_int_value())
3491     {
3492       /*
3493         It's better to return an error here than getting a confusing
3494         'duplicate key error' later.
3495       */
3496       result= HA_ERR_AUTOINC_ERANGE;
3497     }
3498     else
3499     {
3500       /*
3501         Field refused this value (overflow) and truncated it, use the result
3502         of the truncation (which is going to be inserted); however we try to
3503         decrease it to honour auto_increment_* variables.
3504         That will shift the left bound of the reserved interval, we don't
3505         bother shifting the right bound (anyway any other value from this
3506         interval will cause a duplicate key).
3507       */
3508       nr= prev_insert_id(table->next_number_field->val_int(), variables);
3509       if (unlikely(table->next_number_field->store((longlong)nr, TRUE)))
3510         nr= table->next_number_field->val_int();
3511     }
3512   }
3513   if (append)
3514   {
3515     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3516                                           variables->auto_increment_increment);
3517     auto_inc_intervals_count++;
3518     /* Row-based replication does not need to store intervals in binlog */
3519     if (((WSREP(thd) && wsrep_emulate_bin_log ) || mysql_bin_log.is_open())
3520         && !thd->is_current_stmt_binlog_format_row())
3521       thd->auto_inc_intervals_in_cur_stmt_for_binlog.
3522         append(auto_inc_interval_for_cur_row.minimum(),
3523                auto_inc_interval_for_cur_row.values(),
3524                variables->auto_increment_increment);
3525   }
3526 
3527   /*
3528     Record this autogenerated value. If the caller then
3529     succeeds to insert this value, it will call
3530     record_first_successful_insert_id_in_cur_stmt()
3531     which will set first_successful_insert_id_in_cur_stmt if it's not
3532     already set.
3533   */
3534   insert_id_for_cur_row= nr;
3535 
3536   if (result)                                   // overflow
3537     DBUG_RETURN(result);
3538 
3539   /*
3540     Set next insert id to point to next auto-increment value to be able to
3541     handle multi-row statements.
3542   */
3543   set_next_insert_id(compute_next_insert_id(nr, variables));
3544 
3545   DBUG_RETURN(0);
3546 }
3547 
3548 
3549 /** @brief
3550   MySQL signal that it changed the column bitmap
3551 
3552   USAGE
3553     This is for handlers that needs to setup their own column bitmaps.
3554     Normally the handler should set up their own column bitmaps in
3555     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3556     this.
3557 
3558     The handler is allowed to do changes to the bitmap after a index_init or
3559     rnd_init() call is made as after this, MySQL will not use the bitmap
3560     for any program logic checking.
3561 */
3562 void handler::column_bitmaps_signal()
3563 {
3564   DBUG_ENTER("column_bitmaps_signal");
3565   if (table)
3566     DBUG_PRINT("info", ("read_set: %p  write_set: %p",
3567                         table->read_set, table->write_set));
3568   DBUG_VOID_RETURN;
3569 }
3570 
3571 
3572 /** @brief
3573   Reserves an interval of auto_increment values from the handler.
3574 
3575   SYNOPSIS
3576     get_auto_increment()
3577     offset
3578     increment
3579     nb_desired_values   how many values we want
3580     first_value         (OUT) the first value reserved by the handler
3581     nb_reserved_values  (OUT) how many values the handler reserved
3582 
3583   offset and increment means that we want values to be of the form
3584   offset + N * increment, where N>=0 is integer.
3585   If the function sets *first_value to ~(ulonglong)0 it means an error.
3586   If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3587   reserved to "positive infinite".
3588 */
3589 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3590                                  ulonglong nb_desired_values,
3591                                  ulonglong *first_value,
3592                                  ulonglong *nb_reserved_values)
3593 {
3594   ulonglong nr;
3595   int error;
3596   MY_BITMAP *old_read_set;
3597   bool rnd_inited= (inited ==  RND);
3598 
3599   if (rnd_inited && ha_rnd_end())
3600     return;
3601 
3602   old_read_set= table->prepare_for_keyread(table->s->next_number_index);
3603 
3604   if (ha_index_init(table->s->next_number_index, 1))
3605   {
3606     /* This should never happen, assert in debug, and fail in release build */
3607     DBUG_ASSERT(0);
3608     (void) extra(HA_EXTRA_NO_KEYREAD);
3609     *first_value= ULONGLONG_MAX;
3610     if (rnd_inited && ha_rnd_init_with_error(0))
3611     {
3612       //TODO: it would be nice to return here an error
3613     }
3614     return;
3615   }
3616 
3617   if (table->s->next_number_keypart == 0)
3618   {						// Autoincrement at key-start
3619     error= ha_index_last(table->record[1]);
3620     /*
3621       MySQL implicitly assumes such method does locking (as MySQL decides to
3622       use nr+increment without checking again with the handler, in
3623       handler::update_auto_increment()), so reserves to infinite.
3624     */
3625     *nb_reserved_values= ULONGLONG_MAX;
3626   }
3627   else
3628   {
3629     uchar key[MAX_KEY_LENGTH];
3630     key_copy(key, table->record[0],
3631              table->key_info + table->s->next_number_index,
3632              table->s->next_number_key_offset);
3633     error= ha_index_read_map(table->record[1], key,
3634                              make_prev_keypart_map(table->s->
3635                                                    next_number_keypart),
3636                              HA_READ_PREFIX_LAST);
3637     /*
3638       MySQL needs to call us for next row: assume we are inserting ("a",null)
3639       here, we return 3, and next this statement will want to insert
3640       ("b",null): there is no reason why ("b",3+1) would be the good row to
3641       insert: maybe it already exists, maybe 3+1 is too large...
3642     */
3643     *nb_reserved_values= 1;
3644   }
3645 
3646   if (unlikely(error))
3647   {
3648     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3649       /* No entry found, that's fine */;
3650     else
3651       print_error(error, MYF(0));
3652     nr= 1;
3653   }
3654   else
3655     nr= ((ulonglong) table->next_number_field->
3656          val_int_offset(table->s->rec_buff_length)+1);
3657   ha_index_end();
3658   table->restore_column_maps_after_keyread(old_read_set);
3659   *first_value= nr;
3660   if (rnd_inited && ha_rnd_init_with_error(0))
3661   {
3662     //TODO: it would be nice to return here an error
3663   }
3664   return;
3665 }
3666 
3667 
3668 void handler::ha_release_auto_increment()
3669 {
3670   DBUG_ENTER("ha_release_auto_increment");
3671   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3672               m_lock_type != F_UNLCK ||
3673               (!next_insert_id && !insert_id_for_cur_row));
3674   release_auto_increment();
3675   insert_id_for_cur_row= 0;
3676   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3677   auto_inc_intervals_count= 0;
3678   if (next_insert_id > 0)
3679   {
3680     next_insert_id= 0;
3681     /*
3682       this statement used forced auto_increment values if there were some,
3683       wipe them away for other statements.
3684     */
3685     table->in_use->auto_inc_intervals_forced.empty();
3686   }
3687   DBUG_VOID_RETURN;
3688 }
3689 
3690 
3691 /**
3692   Construct and emit duplicate key error message using information
3693   from table's record buffer.
3694 
3695   @param table    TABLE object which record buffer should be used as
3696                   source for column values.
3697   @param key      Key description.
3698   @param msg      Error message template to which key value should be
3699                   added.
3700   @param errflag  Flags for my_error() call.
3701 
3702   @notes
3703     The error message is from ER_DUP_ENTRY_WITH_KEY_NAME but to keep things compatibly
3704     with old code, the error number is ER_DUP_ENTRY
3705 */
3706 
3707 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3708 {
3709   /* Write the duplicated key in the error message */
3710   char key_buff[MAX_KEY_LENGTH];
3711   String str(key_buff,sizeof(key_buff),system_charset_info);
3712 
3713   if (key == NULL)
3714   {
3715     /*
3716       Key is unknown. Should only happen if storage engine reports wrong
3717       duplicate key number.
3718     */
3719     my_printf_error(ER_DUP_ENTRY, msg, errflag, "", "*UNKNOWN*");
3720   }
3721   else
3722   {
3723     if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3724       setup_keyinfo_hash(key);
3725     /* Table is opened and defined at this point */
3726     key_unpack(&str,table, key);
3727     uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3728     if (str.length() >= max_length)
3729     {
3730       str.length(max_length-4);
3731       str.append(STRING_WITH_LEN("..."));
3732     }
3733     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(),
3734                     key->name.str);
3735     if (key->algorithm == HA_KEY_ALG_LONG_HASH)
3736       re_setup_keyinfo_hash(key);
3737   }
3738 }
3739 
3740 /**
3741   Construct and emit duplicate key error message using information
3742   from table's record buffer.
3743 
3744   @sa print_keydup_error(table, key, msg, errflag).
3745 */
3746 
3747 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3748 {
3749   print_keydup_error(table, key,
3750                      ER_THD(table->in_use, ER_DUP_ENTRY_WITH_KEY_NAME),
3751                      errflag);
3752 }
3753 
3754 /**
3755   Print error that we got from handler function.
3756 
3757   @note
3758     In case of delete table it's only safe to use the following parts of
3759     the 'table' structure:
3760     - table->s->path
3761     - table->alias
3762 */
3763 
3764 #define SET_FATAL_ERROR fatal_error=1
3765 
3766 void handler::print_error(int error, myf errflag)
3767 {
3768   bool fatal_error= 0;
3769   DBUG_ENTER("handler::print_error");
3770   DBUG_PRINT("enter",("error: %d",error));
3771 
3772   if (ha_thd()->transaction_rollback_request)
3773   {
3774     /* Ensure this becomes a true error */
3775     errflag&= ~(ME_WARNING | ME_NOTE);
3776   }
3777 
3778   int textno= -1; // impossible value
3779   switch (error) {
3780   case EACCES:
3781     textno=ER_OPEN_AS_READONLY;
3782     break;
3783   case EAGAIN:
3784     textno=ER_FILE_USED;
3785     break;
3786   case ENOENT:
3787   case ENOTDIR:
3788   case ELOOP:
3789     textno=ER_FILE_NOT_FOUND;
3790     break;
3791   case ENOSPC:
3792   case HA_ERR_DISK_FULL:
3793     textno= ER_DISK_FULL;
3794     SET_FATAL_ERROR;                            // Ensure error is logged
3795     break;
3796   case HA_ERR_KEY_NOT_FOUND:
3797   case HA_ERR_NO_ACTIVE_RECORD:
3798   case HA_ERR_RECORD_DELETED:
3799   case HA_ERR_END_OF_FILE:
3800     /*
3801       This errors is not not normally fatal (for example for reads). However
3802       if you get it during an update or delete, then its fatal.
3803       As the user is calling print_error() (which is not done on read), we
3804       assume something when wrong with the update or delete.
3805     */
3806     SET_FATAL_ERROR;
3807     textno=ER_KEY_NOT_FOUND;
3808     break;
3809   case HA_ERR_ABORTED_BY_USER:
3810   {
3811     DBUG_ASSERT(ha_thd()->killed);
3812     ha_thd()->send_kill_message();
3813     DBUG_VOID_RETURN;
3814   }
3815   case HA_ERR_WRONG_MRG_TABLE_DEF:
3816     textno=ER_WRONG_MRG_TABLE;
3817     break;
3818   case HA_ERR_FOUND_DUPP_KEY:
3819   {
3820     if (table)
3821     {
3822       uint key_nr=get_dup_key(error);
3823       if ((int) key_nr >= 0 && key_nr < table->s->keys)
3824       {
3825         print_keydup_error(table, &table->key_info[key_nr], errflag);
3826         DBUG_VOID_RETURN;
3827       }
3828     }
3829     textno=ER_DUP_KEY;
3830     break;
3831   }
3832   case HA_ERR_FOREIGN_DUPLICATE_KEY:
3833   {
3834     char rec_buf[MAX_KEY_LENGTH];
3835     String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3836     /* Table is opened and defined at this point */
3837 
3838     /*
3839       Just print the subset of fields that are part of the first index,
3840       printing the whole row from there is not easy.
3841     */
3842     key_unpack(&rec, table, &table->key_info[0]);
3843 
3844     char child_table_name[NAME_LEN + 1];
3845     char child_key_name[NAME_LEN + 1];
3846     if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
3847                             child_key_name, sizeof(child_key_name)))
3848     {
3849       my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
3850                table_share->table_name.str, rec.c_ptr_safe(),
3851                child_table_name, child_key_name);
3852       }
3853     else
3854     {
3855       my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
3856                table_share->table_name.str, rec.c_ptr_safe());
3857     }
3858     DBUG_VOID_RETURN;
3859   }
3860   case HA_ERR_NULL_IN_SPATIAL:
3861     my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
3862     DBUG_VOID_RETURN;
3863   case HA_ERR_FOUND_DUPP_UNIQUE:
3864     textno=ER_DUP_UNIQUE;
3865     break;
3866   case HA_ERR_RECORD_CHANGED:
3867     /*
3868       This is not fatal error when using HANDLER interface
3869       SET_FATAL_ERROR;
3870     */
3871     textno=ER_CHECKREAD;
3872     break;
3873   case HA_ERR_CRASHED:
3874     SET_FATAL_ERROR;
3875     textno=ER_NOT_KEYFILE;
3876     break;
3877   case HA_ERR_WRONG_IN_RECORD:
3878     SET_FATAL_ERROR;
3879     textno= ER_CRASHED_ON_USAGE;
3880     break;
3881   case HA_ERR_CRASHED_ON_USAGE:
3882     SET_FATAL_ERROR;
3883     textno=ER_CRASHED_ON_USAGE;
3884     break;
3885   case HA_ERR_NOT_A_TABLE:
3886     textno= error;
3887     break;
3888   case HA_ERR_CRASHED_ON_REPAIR:
3889     SET_FATAL_ERROR;
3890     textno=ER_CRASHED_ON_REPAIR;
3891     break;
3892   case HA_ERR_OUT_OF_MEM:
3893     textno=ER_OUT_OF_RESOURCES;
3894     break;
3895   case HA_ERR_WRONG_COMMAND:
3896     my_error(ER_ILLEGAL_HA, MYF(0), table_type(), table_share->db.str,
3897              table_share->table_name.str);
3898     DBUG_VOID_RETURN;
3899     break;
3900   case HA_ERR_OLD_FILE:
3901     textno=ER_OLD_KEYFILE;
3902     break;
3903   case HA_ERR_UNSUPPORTED:
3904     textno=ER_UNSUPPORTED_EXTENSION;
3905     break;
3906   case HA_ERR_RECORD_FILE_FULL:
3907   {
3908     textno=ER_RECORD_FILE_FULL;
3909     /* Write the error message to error log */
3910     errflag|= ME_ERROR_LOG;
3911     break;
3912   }
3913   case HA_ERR_INDEX_FILE_FULL:
3914   {
3915     textno=ER_INDEX_FILE_FULL;
3916     /* Write the error message to error log */
3917     errflag|= ME_ERROR_LOG;
3918     break;
3919   }
3920   case HA_ERR_LOCK_WAIT_TIMEOUT:
3921     textno=ER_LOCK_WAIT_TIMEOUT;
3922     break;
3923   case HA_ERR_LOCK_TABLE_FULL:
3924     textno=ER_LOCK_TABLE_FULL;
3925     break;
3926   case HA_ERR_LOCK_DEADLOCK:
3927   {
3928     String str, full_err_msg(ER_DEFAULT(ER_LOCK_DEADLOCK), system_charset_info);
3929 
3930     get_error_message(error, &str);
3931     full_err_msg.append(str);
3932     my_printf_error(ER_LOCK_DEADLOCK, "%s", errflag, full_err_msg.c_ptr_safe());
3933     DBUG_VOID_RETURN;
3934   }
3935   case HA_ERR_READ_ONLY_TRANSACTION:
3936     textno=ER_READ_ONLY_TRANSACTION;
3937     break;
3938   case HA_ERR_CANNOT_ADD_FOREIGN:
3939     textno=ER_CANNOT_ADD_FOREIGN;
3940     break;
3941   case HA_ERR_ROW_IS_REFERENCED:
3942   {
3943     String str;
3944     get_error_message(error, &str);
3945     my_printf_error(ER_ROW_IS_REFERENCED_2,
3946                     ER(str.length() ? ER_ROW_IS_REFERENCED_2 : ER_ROW_IS_REFERENCED),
3947                     errflag, str.c_ptr_safe());
3948     DBUG_VOID_RETURN;
3949   }
3950   case HA_ERR_NO_REFERENCED_ROW:
3951   {
3952     String str;
3953     get_error_message(error, &str);
3954     my_printf_error(ER_NO_REFERENCED_ROW_2,
3955                     ER(str.length() ? ER_NO_REFERENCED_ROW_2 : ER_NO_REFERENCED_ROW),
3956                     errflag, str.c_ptr_safe());
3957     DBUG_VOID_RETURN;
3958   }
3959   case HA_ERR_TABLE_DEF_CHANGED:
3960     textno=ER_TABLE_DEF_CHANGED;
3961     break;
3962   case HA_ERR_NO_SUCH_TABLE:
3963     my_error(ER_NO_SUCH_TABLE_IN_ENGINE, errflag, table_share->db.str,
3964              table_share->table_name.str);
3965     DBUG_VOID_RETURN;
3966   case HA_ERR_RBR_LOGGING_FAILED:
3967     textno= ER_BINLOG_ROW_LOGGING_FAILED;
3968     break;
3969   case HA_ERR_DROP_INDEX_FK:
3970   {
3971     const char *ptr= "???";
3972     uint key_nr= get_dup_key(error);
3973     if ((int) key_nr >= 0)
3974       ptr= table->key_info[key_nr].name.str;
3975     my_error(ER_DROP_INDEX_FK, errflag, ptr);
3976     DBUG_VOID_RETURN;
3977   }
3978   case HA_ERR_TABLE_NEEDS_UPGRADE:
3979     textno= ER_TABLE_NEEDS_UPGRADE;
3980     my_error(ER_TABLE_NEEDS_UPGRADE, errflag,
3981              "TABLE", table_share->table_name.str);
3982     DBUG_VOID_RETURN;
3983   case HA_ERR_NO_PARTITION_FOUND:
3984     textno=ER_WRONG_PARTITION_NAME;
3985     break;
3986   case HA_ERR_TABLE_READONLY:
3987     textno= ER_OPEN_AS_READONLY;
3988     break;
3989   case HA_ERR_AUTOINC_READ_FAILED:
3990     textno= ER_AUTOINC_READ_FAILED;
3991     break;
3992   case HA_ERR_AUTOINC_ERANGE:
3993     textno= error;
3994     my_error(textno, errflag, table->next_number_field->field_name.str,
3995              table->in_use->get_stmt_da()->current_row_for_warning());
3996     DBUG_VOID_RETURN;
3997     break;
3998   case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
3999     textno= ER_TOO_MANY_CONCURRENT_TRXS;
4000     break;
4001   case HA_ERR_INDEX_COL_TOO_LONG:
4002     textno= ER_INDEX_COLUMN_TOO_LONG;
4003     break;
4004   case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4005     textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4006     break;
4007   case HA_ERR_INDEX_CORRUPT:
4008     textno= ER_INDEX_CORRUPT;
4009     break;
4010   case HA_ERR_UNDO_REC_TOO_BIG:
4011     textno= ER_UNDO_RECORD_TOO_BIG;
4012     break;
4013   case HA_ERR_TABLE_IN_FK_CHECK:
4014     textno= ER_TABLE_IN_FK_CHECK;
4015     break;
4016   case HA_ERR_PARTITION_LIST:
4017     my_error(ER_VERS_NOT_ALLOWED, errflag, table->s->db.str, table->s->table_name.str);
4018     DBUG_VOID_RETURN;
4019   default:
4020     {
4021       /* The error was "unknown" to this function.
4022 	 Ask handler if it has got a message for this error */
4023       bool temporary= FALSE;
4024       String str;
4025       temporary= get_error_message(error, &str);
4026       if (!str.is_empty())
4027       {
4028 	const char* engine= table_type();
4029 	if (temporary)
4030 	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.c_ptr(),
4031                    engine);
4032 	else
4033         {
4034           SET_FATAL_ERROR;
4035 	  my_error(ER_GET_ERRMSG, errflag, error, str.c_ptr(), engine);
4036         }
4037       }
4038       else
4039         my_error(ER_GET_ERRNO, errflag, error, table_type());
4040       DBUG_VOID_RETURN;
4041     }
4042   }
4043   DBUG_ASSERT(textno > 0);
4044   if (unlikely(fatal_error))
4045   {
4046     /* Ensure this becomes a true error */
4047     errflag&= ~(ME_WARNING | ME_NOTE);
4048     if ((debug_assert_if_crashed_table ||
4049                       global_system_variables.log_warnings > 1))
4050     {
4051       /*
4052         Log error to log before we crash or if extended warnings are requested
4053       */
4054       errflag|= ME_ERROR_LOG;
4055     }
4056   }
4057 
4058   /* if we got an OS error from a file-based engine, specify a path of error */
4059   if (error < HA_ERR_FIRST && bas_ext()[0])
4060   {
4061     char buff[FN_REFLEN];
4062     strxnmov(buff, sizeof(buff),
4063              table_share->normalized_path.str, bas_ext()[0], NULL);
4064     my_error(textno, errflag, buff, error);
4065   }
4066   else
4067     my_error(textno, errflag, table_share->table_name.str, error);
4068   DBUG_VOID_RETURN;
4069 }
4070 
4071 
4072 /**
4073   Return an error message specific to this handler.
4074 
4075   @param error  error code previously returned by handler
4076   @param buf    pointer to String where to add error message
4077 
4078   @return
4079     Returns true if this is a temporary error
4080 */
4081 bool handler::get_error_message(int error, String* buf)
4082 {
4083   DBUG_EXECUTE_IF("external_lock_failure",
4084                   buf->set_ascii(STRING_WITH_LEN("KABOOM!")););
4085   return FALSE;
4086 }
4087 
4088 /**
4089   Check for incompatible collation changes.
4090 
4091   @retval
4092     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4093   @retval
4094     0                        No upgrade required.
4095 */
4096 
4097 int handler::check_collation_compatibility()
4098 {
4099   ulong mysql_version= table->s->mysql_version;
4100 
4101   if (mysql_version < 50124)
4102   {
4103     KEY *key= table->key_info;
4104     KEY *key_end= key + table->s->keys;
4105     for (; key < key_end; key++)
4106     {
4107       KEY_PART_INFO *key_part= key->key_part;
4108       KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4109       for (; key_part < key_part_end; key_part++)
4110       {
4111         if (!key_part->fieldnr)
4112           continue;
4113         Field *field= table->field[key_part->fieldnr - 1];
4114         uint cs_number= field->charset()->number;
4115         if ((mysql_version < 50048 &&
4116              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4117               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4118               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4119               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4120               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4121               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4122               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4123               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4124              (mysql_version < 50124 &&
4125              (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4126               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4127           return HA_ADMIN_NEEDS_UPGRADE;
4128       }
4129     }
4130   }
4131 
4132   return 0;
4133 }
4134 
4135 
4136 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4137 {
4138   int error;
4139   KEY *keyinfo, *keyend;
4140   KEY_PART_INFO *keypart, *keypartend;
4141 
4142   if (table->s->incompatible_version)
4143     return HA_ADMIN_NEEDS_ALTER;
4144 
4145   if (!table->s->mysql_version)
4146   {
4147     /* check for blob-in-key error */
4148     keyinfo= table->key_info;
4149     keyend= table->key_info + table->s->keys;
4150     for (; keyinfo < keyend; keyinfo++)
4151     {
4152       keypart= keyinfo->key_part;
4153       keypartend= keypart + keyinfo->user_defined_key_parts;
4154       for (; keypart < keypartend; keypart++)
4155       {
4156         if (!keypart->fieldnr)
4157           continue;
4158         Field *field= table->field[keypart->fieldnr-1];
4159         if (field->type() == MYSQL_TYPE_BLOB)
4160         {
4161           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4162             check_opt->flags= T_MEDIUM;
4163           return HA_ADMIN_NEEDS_CHECK;
4164         }
4165       }
4166     }
4167   }
4168   if (table->s->frm_version < FRM_VER_TRUE_VARCHAR)
4169     return HA_ADMIN_NEEDS_ALTER;
4170 
4171   if (unlikely((error= check_collation_compatibility())))
4172     return error;
4173 
4174   return check_for_upgrade(check_opt);
4175 }
4176 
4177 
4178 int handler::check_old_types()
4179 {
4180   Field** field;
4181 
4182   if (!table->s->mysql_version)
4183   {
4184     /* check for bad DECIMAL field */
4185     for (field= table->field; (*field); field++)
4186     {
4187       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4188       {
4189         return HA_ADMIN_NEEDS_ALTER;
4190       }
4191       if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4192       {
4193         return HA_ADMIN_NEEDS_ALTER;
4194       }
4195     }
4196   }
4197   return 0;
4198 }
4199 
4200 
4201 static bool update_frm_version(TABLE *table)
4202 {
4203   char path[FN_REFLEN];
4204   File file;
4205   int result= 1;
4206   DBUG_ENTER("update_frm_version");
4207 
4208   /*
4209     No need to update frm version in case table was created or checked
4210     by server with the same version. This also ensures that we do not
4211     update frm version for temporary tables as this code doesn't support
4212     temporary tables.
4213   */
4214   if (table->s->mysql_version == MYSQL_VERSION_ID)
4215     DBUG_RETURN(0);
4216 
4217   strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4218 
4219   if ((file= mysql_file_open(key_file_frm,
4220                              path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4221   {
4222     uchar version[4];
4223 
4224     int4store(version, MYSQL_VERSION_ID);
4225 
4226     if ((result= (int)mysql_file_pwrite(file, (uchar*) version, 4, 51L,
4227                                         MYF(MY_WME+MY_NABP))))
4228       goto err;
4229 
4230     table->s->mysql_version= MYSQL_VERSION_ID;
4231   }
4232 err:
4233   if (file >= 0)
4234     (void) mysql_file_close(file, MYF(MY_WME));
4235   DBUG_RETURN(result);
4236 }
4237 
4238 
4239 
4240 /**
4241   @return
4242     key if error because of duplicated keys
4243 */
4244 uint handler::get_dup_key(int error)
4245 {
4246   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4247   DBUG_ENTER("handler::get_dup_key");
4248   if (table->s->long_unique_table && table->file->errkey < table->s->keys)
4249     DBUG_RETURN(table->file->errkey);
4250   table->file->errkey  = (uint) -1;
4251   if (error == HA_ERR_FOUND_DUPP_KEY ||
4252       error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
4253       error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4254       error == HA_ERR_DROP_INDEX_FK)
4255     table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4256   DBUG_RETURN(table->file->errkey);
4257 }
4258 
4259 
4260 /**
4261   Delete all files with extension from bas_ext().
4262 
4263   @param name		Base name of table
4264 
4265   @note
4266     We assume that the handler may return more extensions than
4267     was actually used for the file.
4268 
4269   @retval
4270     0   If we successfully deleted at least one file from base_ext and
4271     didn't get any other errors than ENOENT
4272   @retval
4273     !0  Error
4274 */
4275 int handler::delete_table(const char *name)
4276 {
4277   int saved_error= 0;
4278   int error= 0;
4279   int enoent_or_zero;
4280 
4281   if (ht->discover_table)
4282     enoent_or_zero= 0; // the table may not exist in the engine, it's ok
4283   else
4284     enoent_or_zero= ENOENT;  // the first file of bas_ext() *must* exist
4285 
4286   for (const char **ext=bas_ext(); *ext ; ext++)
4287   {
4288     if (mysql_file_delete_with_symlink(key_file_misc, name, *ext, 0))
4289     {
4290       if (my_errno != ENOENT)
4291       {
4292         /*
4293           If error on the first existing file, return the error.
4294           Otherwise delete as much as possible.
4295         */
4296         if (enoent_or_zero)
4297           return my_errno;
4298 	saved_error= my_errno;
4299       }
4300     }
4301     else
4302       enoent_or_zero= 0;                        // No error for ENOENT
4303     error= enoent_or_zero;
4304   }
4305   return saved_error ? saved_error : error;
4306 }
4307 
4308 
4309 int handler::rename_table(const char * from, const char * to)
4310 {
4311   int error= 0;
4312   const char **ext, **start_ext;
4313   start_ext= bas_ext();
4314   for (ext= start_ext; *ext ; ext++)
4315   {
4316     if (unlikely(rename_file_ext(from, to, *ext)))
4317     {
4318       if ((error=my_errno) != ENOENT)
4319 	break;
4320       error= 0;
4321     }
4322   }
4323   if (unlikely(error))
4324   {
4325     /* Try to revert the rename. Ignore errors. */
4326     for (; ext >= start_ext; ext--)
4327       rename_file_ext(to, from, *ext);
4328   }
4329   return error;
4330 }
4331 
4332 
4333 void handler::drop_table(const char *name)
4334 {
4335   ha_close();
4336   delete_table(name);
4337 }
4338 
4339 
4340 /**
4341   Performs checks upon the table.
4342 
4343   @param thd                thread doing CHECK TABLE operation
4344   @param check_opt          options from the parser
4345 
4346   @retval
4347     HA_ADMIN_OK               Successful upgrade
4348   @retval
4349     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4350   @retval
4351     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4352   @retval
4353     HA_ADMIN_NOT_IMPLEMENTED
4354 */
4355 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4356 {
4357   int error;
4358   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4359               m_lock_type != F_UNLCK);
4360 
4361   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4362       (check_opt->sql_flags & TT_FOR_UPGRADE))
4363     return 0;
4364 
4365   if (table->s->mysql_version < MYSQL_VERSION_ID)
4366   {
4367     if (unlikely((error= check_old_types())))
4368       return error;
4369     error= ha_check_for_upgrade(check_opt);
4370     if (unlikely(error && (error != HA_ADMIN_NEEDS_CHECK)))
4371       return error;
4372     if (unlikely(!error && (check_opt->sql_flags & TT_FOR_UPGRADE)))
4373       return 0;
4374   }
4375   if (unlikely((error= check(thd, check_opt))))
4376     return error;
4377   /* Skip updating frm version if not main handler. */
4378   if (table->file != this)
4379     return error;
4380   return update_frm_version(table);
4381 }
4382 
4383 /**
4384   A helper function to mark a transaction read-write,
4385   if it is started.
4386 */
4387 
4388 void handler::mark_trx_read_write_internal()
4389 {
4390   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4391   /*
4392     When a storage engine method is called, the transaction must
4393     have been started, unless it's a DDL call, for which the
4394     storage engine starts the transaction internally, and commits
4395     it internally, without registering in the ha_list.
4396     Unfortunately here we can't know know for sure if the engine
4397     has registered the transaction or not, so we must check.
4398   */
4399   if (ha_info->is_started())
4400   {
4401     DBUG_ASSERT(has_transaction_manager());
4402     /*
4403       table_share can be NULL in ha_delete_table(). See implementation
4404       of standalone function ha_delete_table() in sql_base.cc.
4405     */
4406     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4407       ha_info->set_trx_read_write();
4408   }
4409 }
4410 
4411 
4412 /**
4413   Repair table: public interface.
4414 
4415   @sa handler::repair()
4416 */
4417 
4418 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4419 {
4420   int result;
4421 
4422   mark_trx_read_write();
4423 
4424   result= repair(thd, check_opt);
4425   DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4426               ha_table_flags() & HA_CAN_REPAIR);
4427 
4428   if (result == HA_ADMIN_OK)
4429     result= update_frm_version(table);
4430   return result;
4431 }
4432 
4433 
4434 /**
4435    End bulk insert
4436 */
4437 
4438 int handler::ha_end_bulk_insert()
4439 {
4440   DBUG_ENTER("handler::ha_end_bulk_insert");
4441   DBUG_EXECUTE_IF("crash_end_bulk_insert",
4442                   { extra(HA_EXTRA_FLUSH) ; DBUG_SUICIDE();});
4443   estimation_rows_to_insert= 0;
4444   DBUG_RETURN(end_bulk_insert());
4445 }
4446 
4447 /**
4448   Bulk update row: public interface.
4449 
4450   @sa handler::bulk_update_row()
4451 */
4452 
4453 int
4454 handler::ha_bulk_update_row(const uchar *old_data, const uchar *new_data,
4455                             ha_rows *dup_key_found)
4456 {
4457   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4458               m_lock_type == F_WRLCK);
4459   mark_trx_read_write();
4460 
4461   return bulk_update_row(old_data, new_data, dup_key_found);
4462 }
4463 
4464 
4465 /**
4466   Delete all rows: public interface.
4467 
4468   @sa handler::delete_all_rows()
4469 */
4470 
4471 int
4472 handler::ha_delete_all_rows()
4473 {
4474   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4475               m_lock_type == F_WRLCK);
4476   mark_trx_read_write();
4477 
4478   return delete_all_rows();
4479 }
4480 
4481 
4482 /**
4483   Truncate table: public interface.
4484 
4485   @sa handler::truncate()
4486 */
4487 
4488 int
4489 handler::ha_truncate()
4490 {
4491   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4492               m_lock_type == F_WRLCK);
4493   mark_trx_read_write();
4494 
4495   return truncate();
4496 }
4497 
4498 
4499 /**
4500   Reset auto increment: public interface.
4501 
4502   @sa handler::reset_auto_increment()
4503 */
4504 
4505 int
4506 handler::ha_reset_auto_increment(ulonglong value)
4507 {
4508   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4509               m_lock_type == F_WRLCK);
4510   mark_trx_read_write();
4511 
4512   return reset_auto_increment(value);
4513 }
4514 
4515 
4516 /**
4517   Optimize table: public interface.
4518 
4519   @sa handler::optimize()
4520 */
4521 
4522 int
4523 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4524 {
4525   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4526               m_lock_type == F_WRLCK);
4527   mark_trx_read_write();
4528 
4529   return optimize(thd, check_opt);
4530 }
4531 
4532 
4533 /**
4534   Analyze table: public interface.
4535 
4536   @sa handler::analyze()
4537 */
4538 
4539 int
4540 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4541 {
4542   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4543               m_lock_type != F_UNLCK);
4544   mark_trx_read_write();
4545 
4546   return analyze(thd, check_opt);
4547 }
4548 
4549 
4550 /**
4551   Check and repair table: public interface.
4552 
4553   @sa handler::check_and_repair()
4554 */
4555 
4556 bool
4557 handler::ha_check_and_repair(THD *thd)
4558 {
4559   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4560               m_lock_type == F_UNLCK);
4561   mark_trx_read_write();
4562 
4563   return check_and_repair(thd);
4564 }
4565 
4566 
4567 /**
4568   Disable indexes: public interface.
4569 
4570   @sa handler::disable_indexes()
4571 */
4572 
4573 int
4574 handler::ha_disable_indexes(uint mode)
4575 {
4576   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4577               m_lock_type != F_UNLCK);
4578   mark_trx_read_write();
4579 
4580   return disable_indexes(mode);
4581 }
4582 
4583 
4584 /**
4585   Enable indexes: public interface.
4586 
4587   @sa handler::enable_indexes()
4588 */
4589 
4590 int
4591 handler::ha_enable_indexes(uint mode)
4592 {
4593   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4594               m_lock_type != F_UNLCK);
4595   mark_trx_read_write();
4596 
4597   return enable_indexes(mode);
4598 }
4599 
4600 
4601 /**
4602   Discard or import tablespace: public interface.
4603 
4604   @sa handler::discard_or_import_tablespace()
4605 */
4606 
4607 int
4608 handler::ha_discard_or_import_tablespace(my_bool discard)
4609 {
4610   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4611               m_lock_type == F_WRLCK);
4612   mark_trx_read_write();
4613 
4614   return discard_or_import_tablespace(discard);
4615 }
4616 
4617 
4618 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4619                                              Alter_inplace_info *ha_alter_info)
4620 {
4621   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4622               m_lock_type != F_UNLCK);
4623   mark_trx_read_write();
4624 
4625   return prepare_inplace_alter_table(altered_table, ha_alter_info);
4626 }
4627 
4628 
4629 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4630                                             Alter_inplace_info *ha_alter_info,
4631                                             bool commit)
4632 {
4633    /*
4634      At this point we should have an exclusive metadata lock on the table.
4635      The exception is if we're about to roll back changes (commit= false).
4636      In this case, we might be rolling back after a failed lock upgrade,
4637      so we could be holding the same lock level as for inplace_alter_table().
4638    */
4639    DBUG_ASSERT(ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4640                                                    table->s->db.str,
4641                                                    table->s->table_name.str,
4642                                                    MDL_EXCLUSIVE) ||
4643                !commit);
4644 
4645    return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4646 }
4647 
4648 
4649 /*
4650    Default implementation to support in-place alter table
4651    and old online add/drop index API
4652 */
4653 
4654 enum_alter_inplace_result
4655 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4656                                           Alter_inplace_info *ha_alter_info)
4657 {
4658   DBUG_ENTER("handler::check_if_supported_inplace_alter");
4659 
4660   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4661 
4662   if (altered_table->versioned(VERS_TIMESTAMP))
4663     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4664 
4665   alter_table_operations inplace_offline_operations=
4666     ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE |
4667     ALTER_COLUMN_NAME |
4668     ALTER_RENAME_COLUMN |
4669     ALTER_CHANGE_COLUMN_DEFAULT |
4670     ALTER_COLUMN_DEFAULT |
4671     ALTER_COLUMN_OPTION |
4672     ALTER_CHANGE_CREATE_OPTION |
4673     ALTER_DROP_CHECK_CONSTRAINT |
4674     ALTER_PARTITIONED |
4675     ALTER_VIRTUAL_GCOL_EXPR |
4676     ALTER_RENAME;
4677 
4678   /* Is there at least one operation that requires copy algorithm? */
4679   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4680     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4681 
4682   /*
4683     The following checks for changes related to ALTER_OPTIONS
4684 
4685     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4686     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4687     change column charsets and so not supported in-place through
4688     old API.
4689 
4690     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4691     not supported as in-place operations in old API either.
4692   */
4693   if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4694                                   HA_CREATE_USED_DEFAULT_CHARSET |
4695                                   HA_CREATE_USED_PACK_KEYS |
4696                                   HA_CREATE_USED_CHECKSUM |
4697                                   HA_CREATE_USED_MAX_ROWS) ||
4698       (table->s->row_type != create_info->row_type))
4699     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4700 
4701   uint table_changes= (ha_alter_info->handler_flags &
4702                        ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) ?
4703     IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4704   if (table->file->check_if_incompatible_data(create_info, table_changes)
4705       == COMPATIBLE_DATA_YES)
4706     DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
4707 
4708   DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4709 }
4710 
4711 Alter_inplace_info::Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
4712                      Alter_info *alter_info_arg,
4713                      KEY *key_info_arg, uint key_count_arg,
4714                      partition_info *modified_part_info_arg,
4715                      bool ignore_arg, bool error_non_empty)
4716     : create_info(create_info_arg),
4717     alter_info(alter_info_arg),
4718     key_info_buffer(key_info_arg),
4719     key_count(key_count_arg),
4720     index_drop_count(0),
4721     index_drop_buffer(nullptr),
4722     index_add_count(0),
4723     index_add_buffer(nullptr),
4724     rename_keys(current_thd->mem_root),
4725     handler_ctx(nullptr),
4726     group_commit_ctx(nullptr),
4727     handler_flags(0),
4728     modified_part_info(modified_part_info_arg),
4729     ignore(ignore_arg),
4730     online(false),
4731     unsupported_reason(nullptr),
4732     error_if_not_empty(error_non_empty)
4733   {}
4734 
4735 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4736                                                   const char *try_instead) const
4737 {
4738   if (unsupported_reason == NULL)
4739     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4740              not_supported, try_instead);
4741   else
4742     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4743              not_supported, unsupported_reason, try_instead);
4744 }
4745 
4746 
4747 /**
4748   Rename table: public interface.
4749 
4750   @sa handler::rename_table()
4751 */
4752 
4753 int
4754 handler::ha_rename_table(const char *from, const char *to)
4755 {
4756   DBUG_ASSERT(m_lock_type == F_UNLCK);
4757   mark_trx_read_write();
4758 
4759   return rename_table(from, to);
4760 }
4761 
4762 
4763 /**
4764   Delete table: public interface.
4765 
4766   @sa handler::delete_table()
4767 */
4768 
4769 int
4770 handler::ha_delete_table(const char *name)
4771 {
4772   mark_trx_read_write();
4773   return delete_table(name);
4774 }
4775 
4776 
4777 /**
4778   Drop table in the engine: public interface.
4779 
4780   @sa handler::drop_table()
4781 
4782   The difference between this and delete_table() is that the table is open in
4783   drop_table().
4784 */
4785 
4786 void
4787 handler::ha_drop_table(const char *name)
4788 {
4789   DBUG_ASSERT(m_lock_type == F_UNLCK);
4790   mark_trx_read_write();
4791 
4792   return drop_table(name);
4793 }
4794 
4795 
4796 /**
4797   Create a table in the engine: public interface.
4798 
4799   @sa handler::create()
4800 */
4801 
4802 int
4803 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info_arg)
4804 {
4805   DBUG_ASSERT(m_lock_type == F_UNLCK);
4806   mark_trx_read_write();
4807   int error= create(name, form, info_arg);
4808   if (!error &&
4809       !(info_arg->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER)))
4810     mysql_audit_create_table(form);
4811   return error;
4812 }
4813 
4814 
4815 /**
4816   Create handler files for CREATE TABLE: public interface.
4817 
4818   @sa handler::create_partitioning_metadata()
4819 */
4820 
4821 int
4822 handler::ha_create_partitioning_metadata(const char *name,
4823                                          const char *old_name,
4824                                          int action_flag)
4825 {
4826   /*
4827     Normally this is done when unlocked, but in fast_alter_partition_table,
4828     it is done on an already locked handler when preparing to alter/rename
4829     partitions.
4830   */
4831   DBUG_ASSERT(m_lock_type == F_UNLCK ||
4832               (!old_name && strcmp(name, table_share->path.str)));
4833 
4834 
4835   mark_trx_read_write();
4836   return create_partitioning_metadata(name, old_name, action_flag);
4837 }
4838 
4839 
4840 /**
4841   Change partitions: public interface.
4842 
4843   @sa handler::change_partitions()
4844 */
4845 
4846 int
4847 handler::ha_change_partitions(HA_CREATE_INFO *create_info,
4848                               const char *path,
4849                               ulonglong * const copied,
4850                               ulonglong * const deleted,
4851                               const uchar *pack_frm_data,
4852                               size_t pack_frm_len)
4853 {
4854   /*
4855     Must have at least RDLCK or be a TMP table. Read lock is needed to read
4856     from current partitions and write lock will be taken on new partitions.
4857   */
4858   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4859               m_lock_type != F_UNLCK);
4860 
4861   mark_trx_read_write();
4862 
4863   return change_partitions(create_info, path, copied, deleted,
4864                            pack_frm_data, pack_frm_len);
4865 }
4866 
4867 
4868 /**
4869   Drop partitions: public interface.
4870 
4871   @sa handler::drop_partitions()
4872 */
4873 
4874 int
4875 handler::ha_drop_partitions(const char *path)
4876 {
4877   DBUG_ASSERT(!table->db_stat);
4878 
4879   mark_trx_read_write();
4880 
4881   return drop_partitions(path);
4882 }
4883 
4884 
4885 /**
4886   Rename partitions: public interface.
4887 
4888   @sa handler::rename_partitions()
4889 */
4890 
4891 int
4892 handler::ha_rename_partitions(const char *path)
4893 {
4894   DBUG_ASSERT(!table->db_stat);
4895 
4896   mark_trx_read_write();
4897 
4898   return rename_partitions(path);
4899 }
4900 
4901 
4902 /**
4903   Tell the storage engine that it is allowed to "disable transaction" in the
4904   handler. It is a hint that ACID is not required - it was used in NDB for
4905   ALTER TABLE, for example, when data are copied to temporary table.
4906   A storage engine may treat this hint any way it likes. NDB for example
4907   started to commit every now and then automatically.
4908   This hint can be safely ignored.
4909 */
4910 int ha_enable_transaction(THD *thd, bool on)
4911 {
4912   int error=0;
4913   DBUG_ENTER("ha_enable_transaction");
4914   DBUG_PRINT("enter", ("on: %d", (int) on));
4915 
4916   if ((thd->transaction.on= on))
4917   {
4918     /*
4919       Now all storage engines should have transaction handling enabled.
4920       But some may have it enabled all the time - "disabling" transactions
4921       is an optimization hint that storage engine is free to ignore.
4922       So, let's commit an open transaction (if any) now.
4923     */
4924     if (likely(!(error= ha_commit_trans(thd, 0))))
4925       error= trans_commit_implicit(thd);
4926   }
4927   DBUG_RETURN(error);
4928 }
4929 
4930 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
4931 {
4932   int error;
4933   DBUG_ENTER("handler::index_next_same");
4934   if (!(error=index_next(buf)))
4935   {
4936     my_ptrdiff_t ptrdiff= buf - table->record[0];
4937     uchar *UNINIT_VAR(save_record_0);
4938     KEY *UNINIT_VAR(key_info);
4939     KEY_PART_INFO *UNINIT_VAR(key_part);
4940     KEY_PART_INFO *UNINIT_VAR(key_part_end);
4941 
4942     /*
4943       key_cmp_if_same() compares table->record[0] against 'key'.
4944       In parts it uses table->record[0] directly, in parts it uses
4945       field objects with their local pointers into table->record[0].
4946       If 'buf' is distinct from table->record[0], we need to move
4947       all record references. This is table->record[0] itself and
4948       the field pointers of the fields used in this key.
4949     */
4950     if (ptrdiff)
4951     {
4952       save_record_0= table->record[0];
4953       table->record[0]= buf;
4954       key_info= table->key_info + active_index;
4955       key_part= key_info->key_part;
4956       key_part_end= key_part + key_info->user_defined_key_parts;
4957       for (; key_part < key_part_end; key_part++)
4958       {
4959         DBUG_ASSERT(key_part->field);
4960         key_part->field->move_field_offset(ptrdiff);
4961       }
4962     }
4963 
4964     if (key_cmp_if_same(table, key, active_index, keylen))
4965     {
4966       table->status=STATUS_NOT_FOUND;
4967       error=HA_ERR_END_OF_FILE;
4968     }
4969 
4970     /* Move back if necessary. */
4971     if (ptrdiff)
4972     {
4973       table->record[0]= save_record_0;
4974       for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
4975         key_part->field->move_field_offset(-ptrdiff);
4976     }
4977   }
4978   DBUG_PRINT("return",("%i", error));
4979   DBUG_RETURN(error);
4980 }
4981 
4982 
4983 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
4984                                          uint part_id)
4985 {
4986   info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
4987        HA_STATUS_NO_LOCK);
4988   stat_info->records=              stats.records;
4989   stat_info->mean_rec_length=      stats.mean_rec_length;
4990   stat_info->data_file_length=     stats.data_file_length;
4991   stat_info->max_data_file_length= stats.max_data_file_length;
4992   stat_info->index_file_length=    stats.index_file_length;
4993   stat_info->max_index_file_length=stats.max_index_file_length;
4994   stat_info->delete_length=        stats.delete_length;
4995   stat_info->create_time=          stats.create_time;
4996   stat_info->update_time=          stats.update_time;
4997   stat_info->check_time=           stats.check_time;
4998   stat_info->check_sum=            stats.checksum;
4999   stat_info->check_sum_null=       stats.checksum_null;
5000 }
5001 
5002 
5003 /*
5004   Updates the global table stats with the TABLE this handler represents
5005 */
5006 
5007 void handler::update_global_table_stats()
5008 {
5009   TABLE_STATS * table_stats;
5010 
5011   status_var_add(table->in_use->status_var.rows_read, rows_read);
5012   DBUG_ASSERT(rows_tmp_read == 0);
5013 
5014   if (!table->in_use->userstat_running)
5015   {
5016     rows_read= rows_changed= 0;
5017     return;
5018   }
5019 
5020   if (rows_read + rows_changed == 0)
5021     return;                                     // Nothing to update.
5022 
5023   DBUG_ASSERT(table->s);
5024   DBUG_ASSERT(table->s->table_cache_key.str);
5025 
5026   mysql_mutex_lock(&LOCK_global_table_stats);
5027   /* Gets the global table stats, creating one if necessary. */
5028   if (!(table_stats= (TABLE_STATS*)
5029         my_hash_search(&global_table_stats,
5030                     (uchar*) table->s->table_cache_key.str,
5031                     table->s->table_cache_key.length)))
5032   {
5033     if (!(table_stats = ((TABLE_STATS*)
5034                          my_malloc(sizeof(TABLE_STATS),
5035                                    MYF(MY_WME | MY_ZEROFILL)))))
5036     {
5037       /* Out of memory error already given */
5038       goto end;
5039     }
5040     memcpy(table_stats->table, table->s->table_cache_key.str,
5041            table->s->table_cache_key.length);
5042     table_stats->table_name_length= (uint)table->s->table_cache_key.length;
5043     table_stats->engine_type= ht->db_type;
5044     /* No need to set variables to 0, as we use MY_ZEROFILL above */
5045 
5046     if (my_hash_insert(&global_table_stats, (uchar*) table_stats))
5047     {
5048       /* Out of memory error is already given */
5049       my_free(table_stats);
5050       goto end;
5051     }
5052   }
5053   // Updates the global table stats.
5054   table_stats->rows_read+=    rows_read;
5055   table_stats->rows_changed+= rows_changed;
5056   table_stats->rows_changed_x_indexes+= (rows_changed *
5057                                          (table->s->keys ? table->s->keys :
5058                                           1));
5059   rows_read= rows_changed= 0;
5060 end:
5061   mysql_mutex_unlock(&LOCK_global_table_stats);
5062 }
5063 
5064 
5065 /*
5066   Updates the global index stats with this handler's accumulated index reads.
5067 */
5068 
5069 void handler::update_global_index_stats()
5070 {
5071   DBUG_ASSERT(table->s);
5072 
5073   if (!table->in_use->userstat_running)
5074   {
5075     /* Reset all index read values */
5076     bzero(index_rows_read, sizeof(index_rows_read[0]) * table->s->keys);
5077     return;
5078   }
5079 
5080   for (uint index = 0; index < table->s->keys; index++)
5081   {
5082     if (index_rows_read[index])
5083     {
5084       INDEX_STATS* index_stats;
5085       size_t key_length;
5086       KEY *key_info = &table->key_info[index];  // Rows were read using this
5087 
5088       DBUG_ASSERT(key_info->cache_name);
5089       if (!key_info->cache_name)
5090         continue;
5091       key_length= table->s->table_cache_key.length + key_info->name.length + 1;
5092       mysql_mutex_lock(&LOCK_global_index_stats);
5093       // Gets the global index stats, creating one if necessary.
5094       if (!(index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
5095                                                     key_info->cache_name,
5096                                                     key_length)))
5097       {
5098         if (!(index_stats = ((INDEX_STATS*)
5099                              my_malloc(sizeof(INDEX_STATS),
5100                                        MYF(MY_WME | MY_ZEROFILL)))))
5101           goto end;                             // Error is already given
5102 
5103         memcpy(index_stats->index, key_info->cache_name, key_length);
5104         index_stats->index_name_length= key_length;
5105         if (my_hash_insert(&global_index_stats, (uchar*) index_stats))
5106         {
5107           my_free(index_stats);
5108           goto end;
5109         }
5110       }
5111       /* Updates the global index stats. */
5112       index_stats->rows_read+= index_rows_read[index];
5113       index_rows_read[index]= 0;
5114 end:
5115       mysql_mutex_unlock(&LOCK_global_index_stats);
5116     }
5117   }
5118 }
5119 
5120 
5121 static void flush_checksum(ha_checksum *row_crc, uchar **checksum_start,
5122                            size_t *checksum_length)
5123 {
5124   if (*checksum_start)
5125   {
5126     *row_crc= my_checksum(*row_crc, *checksum_start, *checksum_length);
5127     *checksum_start= NULL;
5128     *checksum_length= 0;
5129   }
5130 }
5131 
5132 
5133 /* calculating table's checksum */
5134 int handler::calculate_checksum()
5135 {
5136   int error;
5137   THD *thd=ha_thd();
5138   DBUG_ASSERT(table->s->last_null_bit_pos < 8);
5139   uchar null_mask= table->s->last_null_bit_pos
5140                    ? 256 -  (1 << table->s->last_null_bit_pos) : 0;
5141 
5142   table->use_all_stored_columns();
5143   stats.checksum= 0;
5144 
5145   if ((error= ha_rnd_init(1)))
5146     return error;
5147 
5148   for (;;)
5149   {
5150     if (thd->killed)
5151       return HA_ERR_ABORTED_BY_USER;
5152 
5153     ha_checksum row_crc= 0;
5154     error= ha_rnd_next(table->record[0]);
5155     if (error)
5156       break;
5157 
5158     if (table->s->null_bytes)
5159     {
5160       /* fix undefined null bits */
5161       table->record[0][table->s->null_bytes-1] |= null_mask;
5162       if (!(table->s->db_create_options & HA_OPTION_PACK_RECORD))
5163         table->record[0][0] |= 1;
5164 
5165       row_crc= my_checksum(row_crc, table->record[0], table->s->null_bytes);
5166     }
5167 
5168     uchar *checksum_start= NULL;
5169     size_t checksum_length= 0;
5170     for (uint i= 0; i < table->s->fields; i++ )
5171     {
5172       Field *f= table->field[i];
5173 
5174       if (! thd->variables.old_mode && f->is_real_null(0))
5175       {
5176         flush_checksum(&row_crc, &checksum_start, &checksum_length);
5177         continue;
5178       }
5179      /*
5180        BLOB and VARCHAR have pointers in their field, we must convert
5181        to string; GEOMETRY is implemented on top of BLOB.
5182        BIT may store its data among NULL bits, convert as well.
5183      */
5184       switch (f->type()) {
5185         case MYSQL_TYPE_BLOB:
5186         case MYSQL_TYPE_VARCHAR:
5187         case MYSQL_TYPE_GEOMETRY:
5188         case MYSQL_TYPE_BIT:
5189         {
5190           flush_checksum(&row_crc, &checksum_start, &checksum_length);
5191           String tmp;
5192           f->val_str(&tmp);
5193           row_crc= my_checksum(row_crc, (uchar*) tmp.ptr(), tmp.length());
5194           break;
5195         }
5196         default:
5197           if (!checksum_start)
5198             checksum_start= f->ptr;
5199           DBUG_ASSERT(checksum_start + checksum_length == f->ptr);
5200           checksum_length+= f->pack_length();
5201           break;
5202       }
5203     }
5204     flush_checksum(&row_crc, &checksum_start, &checksum_length);
5205 
5206     stats.checksum+= row_crc;
5207   }
5208   ha_rnd_end();
5209   return error == HA_ERR_END_OF_FILE ? 0 : error;
5210 }
5211 
5212 
5213 /****************************************************************************
5214 ** Some general functions that isn't in the handler class
5215 ****************************************************************************/
5216 
5217 /**
5218   Initiates table-file and calls appropriate database-creator.
5219 
5220   @retval
5221    0  ok
5222   @retval
5223    1  error
5224 */
5225 int ha_create_table(THD *thd, const char *path,
5226                     const char *db, const char *table_name,
5227                     HA_CREATE_INFO *create_info, LEX_CUSTRING *frm)
5228 {
5229   int error= 1;
5230   TABLE table;
5231   char name_buff[FN_REFLEN];
5232   const char *name;
5233   TABLE_SHARE share;
5234   Abort_on_warning_instant_set old_abort_on_warning(thd, 0);
5235   bool temp_table __attribute__((unused)) =
5236     create_info->options & (HA_LEX_CREATE_TMP_TABLE | HA_CREATE_TMP_ALTER);
5237   DBUG_ENTER("ha_create_table");
5238 
5239   init_tmp_table_share(thd, &share, db, 0, table_name, path);
5240 
5241   if (frm)
5242   {
5243     bool write_frm_now= !create_info->db_type->discover_table &&
5244                         !create_info->tmp_table();
5245 
5246     share.frm_image= frm;
5247 
5248     // open an frm image
5249     if (share.init_from_binary_frm_image(thd, write_frm_now,
5250                                          frm->str, frm->length))
5251       goto err;
5252   }
5253   else
5254   {
5255     // open an frm file
5256     share.db_plugin= ha_lock_engine(thd, create_info->db_type);
5257 
5258     if (open_table_def(thd, &share))
5259       goto err;
5260   }
5261 
5262   share.m_psi= PSI_CALL_get_table_share(temp_table, &share);
5263 
5264   if (open_table_from_share(thd, &share, &empty_clex_str, 0, READ_ALL, 0,
5265                             &table, true))
5266     goto err;
5267 
5268   update_create_info_from_table(create_info, &table);
5269 
5270   name= get_canonical_filename(table.file, share.path.str, name_buff);
5271 
5272   error= table.file->ha_create(name, &table, create_info);
5273 
5274   if (unlikely(error))
5275   {
5276     if (!thd->is_error())
5277       my_error(ER_CANT_CREATE_TABLE, MYF(0), db, table_name, error);
5278     table.file->print_error(error, MYF(ME_WARNING));
5279     PSI_CALL_drop_table_share(temp_table, share.db.str, (uint)share.db.length,
5280                               share.table_name.str, (uint)share.table_name.length);
5281   }
5282 
5283   (void) closefrm(&table);
5284 
5285 err:
5286   free_table_share(&share);
5287   DBUG_RETURN(error != 0);
5288 }
5289 
5290 void st_ha_check_opt::init()
5291 {
5292   flags= sql_flags= 0;
5293   start_time= my_time(0);
5294 }
5295 
5296 
5297 /*****************************************************************************
5298   Key cache handling.
5299 
5300   This code is only relevant for ISAM/MyISAM tables
5301 
5302   key_cache->cache may be 0 only in the case where a key cache is not
5303   initialized or when we where not able to init the key cache in a previous
5304   call to ha_init_key_cache() (probably out of memory)
5305 *****************************************************************************/
5306 
5307 /**
5308   Init a key cache if it has not been initied before.
5309 */
5310 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *unused
5311                       __attribute__((unused)))
5312 {
5313   DBUG_ENTER("ha_init_key_cache");
5314 
5315   if (!key_cache->key_cache_inited)
5316   {
5317     mysql_mutex_lock(&LOCK_global_system_variables);
5318     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5319     uint tmp_block_size= (uint) key_cache->param_block_size;
5320     uint division_limit= (uint)key_cache->param_division_limit;
5321     uint age_threshold=  (uint)key_cache->param_age_threshold;
5322     uint partitions=     (uint)key_cache->param_partitions;
5323     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5324     mysql_mutex_unlock(&LOCK_global_system_variables);
5325     DBUG_RETURN(!init_key_cache(key_cache,
5326 				tmp_block_size,
5327 				tmp_buff_size,
5328 				division_limit, age_threshold,
5329                                 changed_blocks_hash_size,
5330                                 partitions));
5331   }
5332   DBUG_RETURN(0);
5333 }
5334 
5335 
5336 /**
5337   Resize key cache.
5338 */
5339 int ha_resize_key_cache(KEY_CACHE *key_cache)
5340 {
5341   DBUG_ENTER("ha_resize_key_cache");
5342 
5343   if (key_cache->key_cache_inited)
5344   {
5345     mysql_mutex_lock(&LOCK_global_system_variables);
5346     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5347     long tmp_block_size= (long) key_cache->param_block_size;
5348     uint division_limit= (uint)key_cache->param_division_limit;
5349     uint age_threshold=  (uint)key_cache->param_age_threshold;
5350     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5351     mysql_mutex_unlock(&LOCK_global_system_variables);
5352     DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5353 				  tmp_buff_size,
5354 				  division_limit, age_threshold,
5355                                   changed_blocks_hash_size));
5356   }
5357   DBUG_RETURN(0);
5358 }
5359 
5360 
5361 /**
5362   Change parameters for key cache (like division_limit)
5363 */
5364 int ha_change_key_cache_param(KEY_CACHE *key_cache)
5365 {
5366   DBUG_ENTER("ha_change_key_cache_param");
5367 
5368   if (key_cache->key_cache_inited)
5369   {
5370     mysql_mutex_lock(&LOCK_global_system_variables);
5371     uint division_limit= (uint)key_cache->param_division_limit;
5372     uint age_threshold=  (uint)key_cache->param_age_threshold;
5373     mysql_mutex_unlock(&LOCK_global_system_variables);
5374     change_key_cache_param(key_cache, division_limit, age_threshold);
5375   }
5376   DBUG_RETURN(0);
5377 }
5378 
5379 
5380 /**
5381   Repartition key cache
5382 */
5383 int ha_repartition_key_cache(KEY_CACHE *key_cache)
5384 {
5385   DBUG_ENTER("ha_repartition_key_cache");
5386 
5387   if (key_cache->key_cache_inited)
5388   {
5389     mysql_mutex_lock(&LOCK_global_system_variables);
5390     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5391     long tmp_block_size= (long) key_cache->param_block_size;
5392     uint division_limit= (uint)key_cache->param_division_limit;
5393     uint age_threshold=  (uint)key_cache->param_age_threshold;
5394     uint partitions=     (uint)key_cache->param_partitions;
5395     uint changed_blocks_hash_size=  (uint)key_cache->changed_blocks_hash_size;
5396     mysql_mutex_unlock(&LOCK_global_system_variables);
5397     DBUG_RETURN(!repartition_key_cache(key_cache, tmp_block_size,
5398 				       tmp_buff_size,
5399 				       division_limit, age_threshold,
5400                                        changed_blocks_hash_size,
5401                                        partitions));
5402   }
5403   DBUG_RETURN(0);
5404 }
5405 
5406 
5407 /**
5408   Move all tables from one key cache to another one.
5409 */
5410 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5411 			KEY_CACHE *new_key_cache)
5412 {
5413   mi_change_key_cache(old_key_cache, new_key_cache);
5414   return 0;
5415 }
5416 
5417 
5418 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5419                                    void *arg)
5420 {
5421   TABLE_SHARE *share= (TABLE_SHARE *)arg;
5422   handlerton *hton= plugin_hton(plugin);
5423   if (hton->state == SHOW_OPTION_YES && hton->discover_table)
5424   {
5425     share->db_plugin= plugin;
5426     int error= hton->discover_table(hton, thd, share);
5427     if (error != HA_ERR_NO_SUCH_TABLE)
5428     {
5429       if (unlikely(error))
5430       {
5431         if (!share->error)
5432         {
5433           share->error= OPEN_FRM_ERROR_ALREADY_ISSUED;
5434           plugin_unlock(0, share->db_plugin);
5435         }
5436 
5437         /*
5438           report an error, unless it is "generic" and a more
5439           specific one was already reported
5440         */
5441         if (error != HA_ERR_GENERIC || !thd->is_error())
5442           my_error(ER_GET_ERRNO, MYF(0), error, plugin_name(plugin)->str);
5443         share->db_plugin= 0;
5444       }
5445       else
5446         share->error= OPEN_FRM_OK;
5447 
5448       status_var_increment(thd->status_var.ha_discover_count);
5449       return TRUE; // abort the search
5450     }
5451     share->db_plugin= 0;
5452   }
5453 
5454   DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);
5455   return FALSE;    // continue with the next engine
5456 }
5457 
5458 int ha_discover_table(THD *thd, TABLE_SHARE *share)
5459 {
5460   DBUG_ENTER("ha_discover_table");
5461   int found;
5462 
5463   DBUG_ASSERT(share->error == OPEN_FRM_OPEN_ERROR);   // share is not OK yet
5464 
5465   if (!engines_with_discover)
5466     found= FALSE;
5467   else if (share->db_plugin)
5468     found= discover_handlerton(thd, share->db_plugin, share);
5469   else
5470     found= plugin_foreach(thd, discover_handlerton,
5471                         MYSQL_STORAGE_ENGINE_PLUGIN, share);
5472 
5473   if (!found)
5474     open_table_error(share, OPEN_FRM_OPEN_ERROR, ENOENT); // not found
5475 
5476   DBUG_RETURN(share->error != OPEN_FRM_OK);
5477 }
5478 
5479 static my_bool file_ext_exists(char *path, size_t path_len, const char *ext)
5480 {
5481   strmake(path + path_len, ext, FN_REFLEN - path_len);
5482   return !access(path, F_OK);
5483 }
5484 
5485 struct st_discover_existence_args
5486 {
5487   char *path;
5488   size_t  path_len;
5489   const char *db, *table_name;
5490   handlerton *hton;
5491   bool frm_exists;
5492 };
5493 
5494 static my_bool discover_existence(THD *thd, plugin_ref plugin,
5495                                   void *arg)
5496 {
5497   st_discover_existence_args *args= (st_discover_existence_args*)arg;
5498   handlerton *ht= plugin_hton(plugin);
5499   if (ht->state != SHOW_OPTION_YES || !ht->discover_table_existence)
5500     return args->frm_exists;
5501 
5502   args->hton= ht;
5503 
5504   if (ht->discover_table_existence == ext_based_existence)
5505     return file_ext_exists(args->path, args->path_len,
5506                            ht->tablefile_extensions[0]);
5507 
5508   return ht->discover_table_existence(ht, args->db, args->table_name);
5509 }
5510 
5511 class Table_exists_error_handler : public Internal_error_handler
5512 {
5513 public:
5514   Table_exists_error_handler()
5515     : m_handled_errors(0), m_unhandled_errors(0)
5516   {}
5517 
5518   bool handle_condition(THD *thd,
5519                         uint sql_errno,
5520                         const char* sqlstate,
5521                         Sql_condition::enum_warning_level *level,
5522                         const char* msg,
5523                         Sql_condition ** cond_hdl)
5524   {
5525     *cond_hdl= NULL;
5526     if (sql_errno == ER_NO_SUCH_TABLE ||
5527         sql_errno == ER_NO_SUCH_TABLE_IN_ENGINE ||
5528         sql_errno == ER_WRONG_OBJECT)
5529     {
5530       m_handled_errors++;
5531       return TRUE;
5532     }
5533 
5534     if (*level == Sql_condition::WARN_LEVEL_ERROR)
5535       m_unhandled_errors++;
5536     return FALSE;
5537   }
5538 
5539   bool safely_trapped_errors()
5540   {
5541     return ((m_handled_errors > 0) && (m_unhandled_errors == 0));
5542   }
5543 
5544 private:
5545   int m_handled_errors;
5546   int m_unhandled_errors;
5547 };
5548 
5549 /**
5550   Check if a given table exists, without doing a full discover, if possible
5551 
5552   If the 'hton' is not NULL, it's set to the handlerton of the storage engine
5553   of this table, or to view_pseudo_hton if the frm belongs to a view.
5554 
5555   This function takes discovery correctly into account. If frm is found,
5556   it discovers the table to make sure it really exists in the engine.
5557   If no frm is found it discovers the table, in case it still exists in
5558   the engine.
5559 
5560   While it tries to cut corners (don't open .frm if no discovering engine is
5561   enabled, no full discovery if all discovering engines support
5562   discover_table_existence, etc), it still *may* be quite expensive
5563   and must be used sparingly.
5564 
5565   @retval true    Table exists (even if the error occurred, like bad frm)
5566   @retval false   Table does not exist (one can do CREATE TABLE table_name)
5567 
5568   @note if frm exists and the table in engine doesn't, *hton will be set,
5569         but the return value will be false.
5570 
5571   @note if frm file exists, but the table cannot be opened (engine not
5572         loaded, frm is invalid), the return value will be true, but
5573         *hton will be NULL.
5574 */
5575 
5576 bool ha_table_exists(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table_name,
5577                      handlerton **hton, bool *is_sequence)
5578 {
5579   handlerton *dummy;
5580   bool dummy2;
5581   DBUG_ENTER("ha_table_exists");
5582 
5583   if (hton)
5584     *hton= 0;
5585   else if (engines_with_discover)
5586     hton= &dummy;
5587   if (!is_sequence)
5588     is_sequence= &dummy2;
5589   *is_sequence= 0;
5590 
5591   TDC_element *element= tdc_lock_share(thd, db->str, table_name->str);
5592   if (element && element != MY_ERRPTR)
5593   {
5594     if (hton)
5595       *hton= element->share->db_type();
5596     *is_sequence= element->share->table_type == TABLE_TYPE_SEQUENCE;
5597     tdc_unlock_share(element);
5598     DBUG_RETURN(TRUE);
5599   }
5600 
5601   char path[FN_REFLEN + 1];
5602   size_t path_len = build_table_filename(path, sizeof(path) - 1,
5603                                          db->str, table_name->str, "", 0);
5604   st_discover_existence_args args= {path, path_len, db->str, table_name->str, 0, true};
5605 
5606   if (file_ext_exists(path, path_len, reg_ext))
5607   {
5608     bool exists= true;
5609     if (hton)
5610     {
5611       char engine_buf[NAME_CHAR_LEN + 1];
5612       LEX_CSTRING engine= { engine_buf, 0 };
5613       Table_type type;
5614 
5615       if ((type= dd_frm_type(thd, path, &engine, is_sequence)) ==
5616           TABLE_TYPE_UNKNOWN)
5617         DBUG_RETURN(0);
5618 
5619       if (type != TABLE_TYPE_VIEW)
5620       {
5621         plugin_ref p=  plugin_lock_by_name(thd, &engine,
5622                                            MYSQL_STORAGE_ENGINE_PLUGIN);
5623         *hton= p ? plugin_hton(p) : NULL;
5624         if (*hton)
5625           // verify that the table really exists
5626           exists= discover_existence(thd, p, &args);
5627       }
5628       else
5629         *hton= view_pseudo_hton;
5630     }
5631     DBUG_RETURN(exists);
5632   }
5633 
5634   args.frm_exists= false;
5635   if (plugin_foreach(thd, discover_existence, MYSQL_STORAGE_ENGINE_PLUGIN,
5636                      &args))
5637   {
5638     if (hton)
5639       *hton= args.hton;
5640     DBUG_RETURN(TRUE);
5641   }
5642 
5643   if (need_full_discover_for_existence)
5644   {
5645     TABLE_LIST table;
5646     uint flags = GTS_TABLE | GTS_VIEW;
5647     if (!hton)
5648       flags|= GTS_NOLOCK;
5649 
5650     Table_exists_error_handler no_such_table_handler;
5651     thd->push_internal_handler(&no_such_table_handler);
5652     table.init_one_table(db, table_name, 0, TL_READ);
5653     TABLE_SHARE *share= tdc_acquire_share(thd, &table, flags);
5654     thd->pop_internal_handler();
5655 
5656     if (hton && share)
5657     {
5658       *hton= share->db_type();
5659       tdc_release_share(share);
5660     }
5661 
5662     // the table doesn't exist if we've caught ER_NO_SUCH_TABLE and nothing else
5663     DBUG_RETURN(!no_such_table_handler.safely_trapped_errors());
5664   }
5665 
5666   DBUG_RETURN(FALSE);
5667 }
5668 
5669 /**
5670   Discover all table names in a given database
5671 */
5672 extern "C" {
5673 
5674 static int cmp_file_names(const void *a, const void *b)
5675 {
5676   CHARSET_INFO *cs= character_set_filesystem;
5677   char *aa= ((FILEINFO *)a)->name;
5678   char *bb= ((FILEINFO *)b)->name;
5679   return my_strnncoll(cs, (uchar*)aa, strlen(aa), (uchar*)bb, strlen(bb));
5680 }
5681 
5682 static int cmp_table_names(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5683 {
5684   return my_strnncoll(&my_charset_bin, (uchar*)((*a)->str), (*a)->length,
5685                                        (uchar*)((*b)->str), (*b)->length);
5686 }
5687 
5688 #ifndef DBUG_OFF
5689 static int cmp_table_names_desc(LEX_CSTRING * const *a, LEX_CSTRING * const *b)
5690 {
5691   return -cmp_table_names(a, b);
5692 }
5693 #endif
5694 
5695 }
5696 
5697 Discovered_table_list::Discovered_table_list(THD *thd_arg,
5698                  Dynamic_array<LEX_CSTRING*> *tables_arg,
5699                  const LEX_CSTRING *wild_arg) :
5700   thd(thd_arg), with_temps(false), tables(tables_arg)
5701 {
5702   if (wild_arg->str && wild_arg->str[0])
5703   {
5704     wild= wild_arg->str;
5705     wend= wild + wild_arg->length;
5706   }
5707   else
5708     wild= 0;
5709 }
5710 
5711 bool Discovered_table_list::add_table(const char *tname, size_t tlen)
5712 {
5713   /*
5714     TODO Check with_temps and filter out temp tables.
5715     Implement the check, when we'll have at least one affected engine (with
5716     custom discover_table_names() method, that calls add_table() directly).
5717     Note: avoid comparing the same name twice (here and in add_file).
5718   */
5719   if (wild && my_wildcmp(table_alias_charset, tname, tname + tlen, wild, wend,
5720                          wild_prefix, wild_one, wild_many))
5721       return 0;
5722 
5723   LEX_CSTRING *name= thd->make_clex_string(tname, tlen);
5724   if (!name || tables->append(name))
5725     return 1;
5726   return 0;
5727 }
5728 
5729 bool Discovered_table_list::add_file(const char *fname)
5730 {
5731   bool is_temp= strncmp(fname, STRING_WITH_LEN(tmp_file_prefix)) == 0;
5732 
5733   if (is_temp && !with_temps)
5734     return 0;
5735 
5736   char tname[SAFE_NAME_LEN + 1];
5737   size_t tlen= filename_to_tablename(fname, tname, sizeof(tname), is_temp);
5738   return add_table(tname, tlen);
5739 }
5740 
5741 
5742 void Discovered_table_list::sort()
5743 {
5744   tables->sort(cmp_table_names);
5745 }
5746 
5747 
5748 #ifndef DBUG_OFF
5749 void Discovered_table_list::sort_desc()
5750 {
5751   tables->sort(cmp_table_names_desc);
5752 }
5753 #endif
5754 
5755 
5756 void Discovered_table_list::remove_duplicates()
5757 {
5758   LEX_CSTRING **src= tables->front();
5759   LEX_CSTRING **dst= src;
5760   sort();
5761   while (++dst <= tables->back())
5762   {
5763     LEX_CSTRING *s= *src, *d= *dst;
5764     DBUG_ASSERT(strncmp(s->str, d->str, MY_MIN(s->length, d->length)) <= 0);
5765     if ((s->length != d->length || strncmp(s->str, d->str, d->length)))
5766     {
5767       src++;
5768       if (src != dst)
5769         *src= *dst;
5770     }
5771   }
5772   tables->elements(src - tables->front() + 1);
5773 }
5774 
5775 struct st_discover_names_args
5776 {
5777   LEX_CSTRING *db;
5778   MY_DIR *dirp;
5779   Discovered_table_list *result;
5780   uint possible_duplicates;
5781 };
5782 
5783 static my_bool discover_names(THD *thd, plugin_ref plugin,
5784                               void *arg)
5785 {
5786   st_discover_names_args *args= (st_discover_names_args *)arg;
5787   handlerton *ht= plugin_hton(plugin);
5788 
5789   if (ht->state == SHOW_OPTION_YES && ht->discover_table_names)
5790   {
5791     size_t old_elements= args->result->tables->elements();
5792     if (ht->discover_table_names(ht, args->db, args->dirp, args->result))
5793       return 1;
5794 
5795     /*
5796       hton_ext_based_table_discovery never discovers a table that has
5797       a corresponding .frm file; but custom engine discover methods might
5798     */
5799     if (ht->discover_table_names != hton_ext_based_table_discovery)
5800       args->possible_duplicates+= (uint)(args->result->tables->elements() - old_elements);
5801   }
5802 
5803   return 0;
5804 }
5805 
5806 /**
5807   Return the list of tables
5808 
5809   @param thd
5810   @param db         database to look into
5811   @param dirp       list of files in this database (as returned by my_dir())
5812   @param result     the object to return the list of files in
5813   @param reusable   if true, on return, 'dirp' will be a valid list of all
5814                     non-table files. If false, discovery will work much faster,
5815                     but it will leave 'dirp' corrupted and completely unusable,
5816                     only good for my_dirend().
5817 
5818   Normally, reusable=false for SHOW and INFORMATION_SCHEMA, and reusable=true
5819   for DROP DATABASE (as it needs to know and delete non-table files).
5820 */
5821 
5822 int ha_discover_table_names(THD *thd, LEX_CSTRING *db, MY_DIR *dirp,
5823                             Discovered_table_list *result, bool reusable)
5824 {
5825   int error;
5826   DBUG_ENTER("ha_discover_table_names");
5827 
5828   if (engines_with_discover_file_names == 0 && !reusable)
5829   {
5830     st_discover_names_args args= {db, NULL, result, 0};
5831     error= ext_table_discovery_simple(dirp, result) ||
5832            plugin_foreach(thd, discover_names,
5833                             MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5834   }
5835   else
5836   {
5837     st_discover_names_args args= {db, dirp, result, 0};
5838 
5839     /* extension_based_table_discovery relies on dirp being sorted */
5840     my_qsort(dirp->dir_entry, dirp->number_of_files,
5841              sizeof(FILEINFO), cmp_file_names);
5842 
5843     error= extension_based_table_discovery(dirp, reg_ext, result) ||
5844            plugin_foreach(thd, discover_names,
5845                             MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5846     if (args.possible_duplicates > 0)
5847       result->remove_duplicates();
5848   }
5849 
5850   DBUG_RETURN(error);
5851 }
5852 
5853 
5854 /*
5855 int handler::pre_read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
5856                                         KEY_MULTI_RANGE *ranges,
5857                                         uint range_count,
5858                                         bool sorted, HANDLER_BUFFER *buffer,
5859                                         bool use_parallel)
5860 {
5861   int result;
5862   DBUG_ENTER("handler::pre_read_multi_range_first");
5863   result = pre_read_range_first(ranges->start_key.keypart_map ?
5864                                 &ranges->start_key : 0,
5865                                 ranges->end_key.keypart_map ?
5866                                 &ranges->end_key : 0,
5867                                 test(ranges->range_flag & EQ_RANGE),
5868                                 sorted,
5869                                 use_parallel);
5870   DBUG_RETURN(result);
5871 }
5872 */
5873 
5874 
5875 /**
5876   Read first row between two ranges.
5877   Store ranges for future calls to read_range_next.
5878 
5879   @param start_key		Start key. Is 0 if no min range
5880   @param end_key		End key.  Is 0 if no max range
5881   @param eq_range_arg	        Set to 1 if start_key == end_key
5882   @param sorted		Set to 1 if result should be sorted per key
5883 
5884   @note
5885     Record is read into table->record[0]
5886 
5887   @retval
5888     0			Found row
5889   @retval
5890     HA_ERR_END_OF_FILE	No rows in range
5891   @retval
5892     \#			Error code
5893 */
5894 int handler::read_range_first(const key_range *start_key,
5895 			      const key_range *end_key,
5896 			      bool eq_range_arg, bool sorted)
5897 {
5898   int result;
5899   DBUG_ENTER("handler::read_range_first");
5900 
5901   eq_range= eq_range_arg;
5902   set_end_range(end_key);
5903   range_key_part= table->key_info[active_index].key_part;
5904 
5905   if (!start_key)			// Read first record
5906     result= ha_index_first(table->record[0]);
5907   else
5908     result= ha_index_read_map(table->record[0],
5909                               start_key->key,
5910                               start_key->keypart_map,
5911                               start_key->flag);
5912   if (result)
5913     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
5914 		? HA_ERR_END_OF_FILE
5915 		: result);
5916 
5917   if (compare_key(end_range) <= 0)
5918   {
5919     DBUG_RETURN(0);
5920   }
5921   else
5922   {
5923     /*
5924       The last read row does not fall in the range. So request
5925       storage engine to release row lock if possible.
5926     */
5927     unlock_row();
5928     DBUG_RETURN(HA_ERR_END_OF_FILE);
5929   }
5930 }
5931 
5932 
5933 /**
5934   Read next row between two ranges.
5935 
5936   @note
5937     Record is read into table->record[0]
5938 
5939   @retval
5940     0			Found row
5941   @retval
5942     HA_ERR_END_OF_FILE	No rows in range
5943   @retval
5944     \#			Error code
5945 */
5946 int handler::read_range_next()
5947 {
5948   int result;
5949   DBUG_ENTER("handler::read_range_next");
5950 
5951   if (eq_range)
5952   {
5953     /* We trust that index_next_same always gives a row in range */
5954     DBUG_RETURN(ha_index_next_same(table->record[0],
5955                                    end_range->key,
5956                                    end_range->length));
5957   }
5958   result= ha_index_next(table->record[0]);
5959   if (result)
5960     DBUG_RETURN(result);
5961 
5962   if (compare_key(end_range) <= 0)
5963   {
5964     DBUG_RETURN(0);
5965   }
5966   else
5967   {
5968     /*
5969       The last read row does not fall in the range. So request
5970       storage engine to release row lock if possible.
5971     */
5972     unlock_row();
5973     DBUG_RETURN(HA_ERR_END_OF_FILE);
5974   }
5975 }
5976 
5977 
5978 void handler::set_end_range(const key_range *end_key)
5979 {
5980   end_range= 0;
5981   if (end_key)
5982   {
5983     end_range= &save_end_range;
5984     save_end_range= *end_key;
5985     key_compare_result_on_equal=
5986       ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
5987        (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
5988   }
5989 }
5990 
5991 
5992 /**
5993   Compare if found key (in row) is over max-value.
5994 
5995   @param range		range to compare to row. May be 0 for no range
5996 
5997   @see also
5998     key.cc::key_cmp()
5999 
6000   @return
6001     The return value is SIGN(key_in_row - range_key):
6002 
6003     - 0   : Key is equal to range or 'range' == 0 (no range)
6004     - -1  : Key is less than range
6005     - 1   : Key is larger than range
6006 */
6007 int handler::compare_key(key_range *range)
6008 {
6009   int cmp;
6010   if (!range || in_range_check_pushed_down)
6011     return 0;					// No max range
6012   cmp= key_cmp(range_key_part, range->key, range->length);
6013   if (!cmp)
6014     cmp= key_compare_result_on_equal;
6015   return cmp;
6016 }
6017 
6018 
6019 /*
6020   Same as compare_key() but doesn't check have in_range_check_pushed_down.
6021   This is used by index condition pushdown implementation.
6022 */
6023 
6024 int handler::compare_key2(key_range *range) const
6025 {
6026   int cmp;
6027   if (!range)
6028     return 0;					// no max range
6029   cmp= key_cmp(range_key_part, range->key, range->length);
6030   if (!cmp)
6031     cmp= key_compare_result_on_equal;
6032   return cmp;
6033 }
6034 
6035 
6036 /**
6037   ICP callback - to be called by an engine to check the pushed condition
6038 */
6039 extern "C" check_result_t handler_index_cond_check(void* h_arg)
6040 {
6041   handler *h= (handler*)h_arg;
6042   THD *thd= h->table->in_use;
6043   check_result_t res;
6044 
6045   DEBUG_SYNC(thd, "handler_index_cond_check");
6046   enum thd_kill_levels abort_at= h->has_transactions() ?
6047     THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6048   if (thd_kill_level(thd) > abort_at)
6049     return CHECK_ABORTED_BY_USER;
6050 
6051   if (h->end_range && h->compare_key2(h->end_range) > 0)
6052     return CHECK_OUT_OF_RANGE;
6053   h->increment_statistics(&SSV::ha_icp_attempts);
6054   if ((res= h->pushed_idx_cond->val_int()? CHECK_POS : CHECK_NEG) ==
6055       CHECK_POS)
6056     h->increment_statistics(&SSV::ha_icp_match);
6057   return res;
6058 }
6059 
6060 
6061 /**
6062   Rowid filter callback - to be called by an engine to check rowid / primary
6063   keys of the rows whose data is to be fetched against the used rowid filter
6064 */
6065 
6066 extern "C"
6067 check_result_t handler_rowid_filter_check(void *h_arg)
6068 {
6069   handler *h= (handler*) h_arg;
6070   TABLE *tab= h->get_table();
6071 
6072   /*
6073     Check for out-of-range and killed conditions only if we haven't done it
6074     already in the pushed index condition check
6075   */
6076   if (!h->pushed_idx_cond)
6077   {
6078     THD *thd= h->table->in_use;
6079     DEBUG_SYNC(thd, "handler_rowid_filter_check");
6080     enum thd_kill_levels abort_at= h->has_transactions() ?
6081       THD_ABORT_SOFTLY : THD_ABORT_ASAP;
6082     if (thd_kill_level(thd) > abort_at)
6083       return CHECK_ABORTED_BY_USER;
6084 
6085     if (h->end_range && h->compare_key2(h->end_range) > 0)
6086       return CHECK_OUT_OF_RANGE;
6087   }
6088 
6089   h->position(tab->record[0]);
6090   return h->pushed_rowid_filter->check((char*)h->ref)? CHECK_POS: CHECK_NEG;
6091 }
6092 
6093 
6094 /**
6095   Callback function for an engine to check whether the used rowid filter
6096   has been already built
6097 */
6098 
6099 extern "C" int handler_rowid_filter_is_active(void *h_arg)
6100 {
6101   if (!h_arg)
6102     return false;
6103   handler *h= (handler*) h_arg;
6104   return h->rowid_filter_is_active;
6105 }
6106 
6107 
6108 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
6109                                 key_part_map keypart_map,
6110                                 enum ha_rkey_function find_flag)
6111 {
6112   int error, UNINIT_VAR(error1);
6113 
6114   error= ha_index_init(index, 0);
6115   if (likely(!error))
6116   {
6117     error= index_read_map(buf, key, keypart_map, find_flag);
6118     error1= ha_index_end();
6119   }
6120   return error ? error : error1;
6121 }
6122 
6123 
6124 /**
6125   Returns a list of all known extensions.
6126 
6127     No mutexes, worst case race is a minor surplus memory allocation
6128     We have to recreate the extension map if mysqld is restarted (for example
6129     within libmysqld)
6130 
6131   @retval
6132     pointer		pointer to TYPELIB structure
6133 */
6134 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
6135                                void *arg)
6136 {
6137   List<char> *found_exts= (List<char> *) arg;
6138   handlerton *hton= plugin_hton(plugin);
6139   List_iterator_fast<char> it(*found_exts);
6140   const char **ext, *old_ext;
6141 
6142   for (ext= hton->tablefile_extensions; *ext; ext++)
6143   {
6144     while ((old_ext= it++))
6145     {
6146       if (!strcmp(old_ext, *ext))
6147         break;
6148     }
6149     if (!old_ext)
6150       found_exts->push_back((char *) *ext);
6151 
6152     it.rewind();
6153   }
6154   return FALSE;
6155 }
6156 
6157 TYPELIB *ha_known_exts(void)
6158 {
6159   if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
6160   {
6161     List<char> found_exts;
6162     const char **ext, *old_ext;
6163 
6164     known_extensions_id= mysys_usage_id;
6165     found_exts.push_back((char*) TRG_EXT);
6166     found_exts.push_back((char*) TRN_EXT);
6167 
6168     plugin_foreach(NULL, exts_handlerton,
6169                    MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
6170 
6171     ext= (const char **) my_once_alloc(sizeof(char *)*
6172                                        (found_exts.elements+1),
6173                                        MYF(MY_WME | MY_FAE));
6174 
6175     DBUG_ASSERT(ext != 0);
6176     known_extensions.count= found_exts.elements;
6177     known_extensions.type_names= ext;
6178 
6179     List_iterator_fast<char> it(found_exts);
6180     while ((old_ext= it++))
6181       *ext++= old_ext;
6182     *ext= 0;
6183   }
6184   return &known_extensions;
6185 }
6186 
6187 
6188 static bool stat_print(THD *thd, const char *type, size_t type_len,
6189                        const char *file, size_t file_len,
6190                        const char *status, size_t status_len)
6191 {
6192   Protocol *protocol= thd->protocol;
6193   protocol->prepare_for_resend();
6194   protocol->store(type, type_len, system_charset_info);
6195   protocol->store(file, file_len, system_charset_info);
6196   protocol->store(status, status_len, system_charset_info);
6197   if (protocol->write())
6198     return TRUE;
6199   return FALSE;
6200 }
6201 
6202 
6203 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
6204                                    void *arg)
6205 {
6206   enum ha_stat_type stat= *(enum ha_stat_type *) arg;
6207   handlerton *hton= plugin_hton(plugin);
6208   if (hton->state == SHOW_OPTION_YES && hton->show_status &&
6209       hton->show_status(hton, thd, stat_print, stat))
6210     return TRUE;
6211   return FALSE;
6212 }
6213 
6214 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
6215 {
6216   List<Item> field_list;
6217   Protocol *protocol= thd->protocol;
6218   MEM_ROOT *mem_root= thd->mem_root;
6219   bool result;
6220 
6221   field_list.push_back(new (mem_root) Item_empty_string(thd, "Type", 10),
6222                        mem_root);
6223   field_list.push_back(new (mem_root)
6224                        Item_empty_string(thd, "Name", FN_REFLEN), mem_root);
6225   field_list.push_back(new (mem_root)
6226                        Item_empty_string(thd, "Status", 10),
6227                        mem_root);
6228 
6229   if (protocol->send_result_set_metadata(&field_list,
6230                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
6231     return TRUE;
6232 
6233   if (db_type == NULL)
6234   {
6235     result= plugin_foreach(thd, showstat_handlerton,
6236                            MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
6237   }
6238   else
6239   {
6240     if (db_type->state != SHOW_OPTION_YES)
6241     {
6242       const LEX_CSTRING *name= hton_name(db_type);
6243       result= stat_print(thd, name->str, name->length,
6244                          "", 0, "DISABLED", 8) ? 1 : 0;
6245     }
6246     else
6247     {
6248       result= db_type->show_status &&
6249               db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
6250     }
6251   }
6252 
6253   /*
6254     We also check thd->is_error() as Innodb may return 0 even if
6255     there was an error.
6256   */
6257   if (likely(!result && !thd->is_error()))
6258     my_eof(thd);
6259   else if (!thd->is_error())
6260     my_error(ER_GET_ERRNO, MYF(0), errno, hton_name(db_type)->str);
6261   return result;
6262 }
6263 
6264 /*
6265   Function to check if the conditions for row-based binlogging is
6266   correct for the table.
6267 
6268   A row in the given table should be replicated if:
6269   - It's not called by partition engine
6270   - Row-based replication is enabled in the current thread
6271   - The binlog is enabled
6272   - It is not a temporary table
6273   - The binary log is open
6274   - The database the table resides in shall be binlogged (binlog_*_db rules)
6275   - table is not mysql.event
6276 
6277   RETURN VALUE
6278     0  No binary logging in row format
6279     1  Row needs to be logged
6280 */
6281 
6282 bool handler::check_table_binlog_row_based(bool binlog_row)
6283 {
6284   if (table->versioned(VERS_TRX_ID))
6285     return false;
6286   if (unlikely((table->in_use->variables.sql_log_bin_off)))
6287     return 0;                            /* Called by partitioning engine */
6288 #ifdef WITH_WSREP
6289   if (!table->in_use->variables.sql_log_bin &&
6290       wsrep_thd_is_applying(table->in_use))
6291     return 0;      /* wsrep patch sets sql_log_bin to silence binlogging
6292                       from high priority threads */
6293 #endif /* WITH_WSREP */
6294   if (unlikely((!check_table_binlog_row_based_done)))
6295   {
6296     check_table_binlog_row_based_done= 1;
6297     check_table_binlog_row_based_result=
6298       check_table_binlog_row_based_internal(binlog_row);
6299   }
6300   return check_table_binlog_row_based_result;
6301 }
6302 
6303 bool handler::check_table_binlog_row_based_internal(bool binlog_row)
6304 {
6305   THD *thd= table->in_use;
6306 
6307   return (table->s->can_do_row_logging &&
6308           thd->is_current_stmt_binlog_format_row() &&
6309           /*
6310             Wsrep partially enables binary logging if it have not been
6311             explicitly turned on. As a result we return 'true' if we are in
6312             wsrep binlog emulation mode and the current thread is not a wsrep
6313             applier or replayer thread. This decision is not affected by
6314             @@sql_log_bin as we want the events to make into the binlog
6315             cache only to filter them later before they make into binary log
6316             file.
6317 
6318             However, we do return 'false' if binary logging was temporarily
6319             turned off (see tmp_disable_binlog(A)).
6320 
6321             Otherwise, return 'true' if binary logging is on.
6322           */
6323           IF_WSREP(((WSREP_EMULATE_BINLOG(thd) &&
6324                      wsrep_thd_is_local(thd)) ||
6325                     ((WSREP(thd) ||
6326                       (thd->variables.option_bits & OPTION_BIN_LOG)) &&
6327                      mysql_bin_log.is_open())),
6328                     (thd->variables.option_bits & OPTION_BIN_LOG) &&
6329                     mysql_bin_log.is_open()));
6330 }
6331 
6332 
6333 /** @brief
6334    Write table maps for all (manually or automatically) locked tables
6335    to the binary log. Also, if binlog_annotate_row_events is ON,
6336    write Annotate_rows event before the first table map.
6337 
6338    SYNOPSIS
6339      write_locked_table_maps()
6340        thd     Pointer to THD structure
6341 
6342    DESCRIPTION
6343        This function will generate and write table maps for all tables
6344        that are locked by the thread 'thd'.
6345 
6346    RETURN VALUE
6347        0   All OK
6348        1   Failed to write all table maps
6349 
6350    SEE ALSO
6351        THD::lock
6352 */
6353 
6354 static int write_locked_table_maps(THD *thd)
6355 {
6356   DBUG_ENTER("write_locked_table_maps");
6357   DBUG_PRINT("enter", ("thd:%p  thd->lock:%p "
6358                        "thd->extra_lock: %p",
6359                        thd, thd->lock, thd->extra_lock));
6360 
6361   DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
6362 
6363   MYSQL_LOCK *locks[2];
6364   locks[0]= thd->extra_lock;
6365   locks[1]= thd->lock;
6366   my_bool with_annotate= IF_WSREP(!wsrep_fragments_certified_for_stmt(thd),
6367                                   true) &&
6368     thd->variables.binlog_annotate_row_events &&
6369     thd->query() && thd->query_length();
6370 
6371   for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
6372   {
6373     MYSQL_LOCK const *const lock= locks[i];
6374     if (lock == NULL)
6375       continue;
6376 
6377     TABLE **const end_ptr= lock->table + lock->table_count;
6378     for (TABLE **table_ptr= lock->table ;
6379          table_ptr != end_ptr ;
6380          ++table_ptr)
6381     {
6382       TABLE *const table= *table_ptr;
6383       DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
6384       if (table->current_lock == F_WRLCK &&
6385           table->file->check_table_binlog_row_based(0))
6386       {
6387         /*
6388           We need to have a transactional behavior for SQLCOM_CREATE_TABLE
6389           (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
6390           compatible behavior with the STMT based replication even when
6391           the table is not transactional. In other words, if the operation
6392           fails while executing the insert phase nothing is written to the
6393           binlog.
6394 
6395           Note that at this point, we check the type of a set of tables to
6396           create the table map events. In the function binlog_log_row(),
6397           which calls the current function, we check the type of the table
6398           of the current row.
6399         */
6400         bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
6401           table->file->has_transactions();
6402         int const error= thd->binlog_write_table_map(table, has_trans,
6403                                                      &with_annotate);
6404         /*
6405           If an error occurs, it is the responsibility of the caller to
6406           roll back the transaction.
6407         */
6408         if (unlikely(error))
6409           DBUG_RETURN(1);
6410       }
6411     }
6412   }
6413   DBUG_RETURN(0);
6414 }
6415 
6416 
6417 static int binlog_log_row_internal(TABLE* table,
6418                                    const uchar *before_record,
6419                                    const uchar *after_record,
6420                                    Log_func *log_func)
6421 {
6422   bool error= 0;
6423   THD *const thd= table->in_use;
6424 
6425   /*
6426     If there are no table maps written to the binary log, this is
6427     the first row handled in this statement. In that case, we need
6428     to write table maps for all locked tables to the binary log.
6429   */
6430   if (likely(!(error= ((thd->get_binlog_table_maps() == 0 &&
6431                         write_locked_table_maps(thd))))))
6432   {
6433     /*
6434       We need to have a transactional behavior for SQLCOM_CREATE_TABLE
6435       (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
6436       compatible behavior with the STMT based replication even when
6437       the table is not transactional. In other words, if the operation
6438       fails while executing the insert phase nothing is written to the
6439       binlog.
6440     */
6441     bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
6442       table->file->has_transactions();
6443     error= (*log_func)(thd, table, has_trans, before_record, after_record);
6444   }
6445   return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
6446 }
6447 
6448 int binlog_log_row(TABLE* table, const uchar *before_record,
6449                    const uchar *after_record, Log_func *log_func)
6450 {
6451 #ifdef WITH_WSREP
6452   THD *const thd= table->in_use;
6453 
6454   /* only InnoDB tables will be replicated through binlog emulation */
6455   if ((WSREP_EMULATE_BINLOG(thd) &&
6456        !(table->file->partition_ht()->flags & HTON_WSREP_REPLICATION)) ||
6457       thd->wsrep_ignore_table == true)
6458     return 0;
6459 #endif
6460 
6461   if (!table->file->check_table_binlog_row_based(1))
6462     return 0;
6463   return binlog_log_row_internal(table, before_record, after_record, log_func);
6464 }
6465 
6466 
6467 int handler::ha_external_lock(THD *thd, int lock_type)
6468 {
6469   int error;
6470   DBUG_ENTER("handler::ha_external_lock");
6471   /*
6472     Whether this is lock or unlock, this should be true, and is to verify that
6473     if get_auto_increment() was called (thus may have reserved intervals or
6474     taken a table lock), ha_release_auto_increment() was too.
6475   */
6476   DBUG_ASSERT(next_insert_id == 0);
6477   /* Consecutive calls for lock without unlocking in between is not allowed */
6478   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6479               ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
6480                lock_type == F_UNLCK));
6481   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
6482   DBUG_ASSERT(inited == NONE || table->open_by_handler);
6483 
6484   if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
6485       MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
6486       MYSQL_HANDLER_UNLOCK_START_ENABLED())
6487   {
6488     if (lock_type == F_RDLCK)
6489     {
6490       MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
6491                                  table_share->table_name.str);
6492     }
6493     else if (lock_type == F_WRLCK)
6494     {
6495       MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
6496                                  table_share->table_name.str);
6497     }
6498     else if (lock_type == F_UNLCK)
6499     {
6500       MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
6501                                  table_share->table_name.str);
6502     }
6503   }
6504 
6505   /*
6506     We cache the table flags if the locking succeeded. Otherwise, we
6507     keep them as they were when they were fetched in ha_open().
6508   */
6509   MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
6510     { error= external_lock(thd, lock_type); })
6511 
6512   DBUG_EXECUTE_IF("external_lock_failure", error= HA_ERR_GENERIC;);
6513 
6514   if (likely(error == 0 || lock_type == F_UNLCK))
6515   {
6516     m_lock_type= lock_type;
6517     cached_table_flags= table_flags();
6518     if (table_share->tmp_table == NO_TMP_TABLE)
6519       mysql_audit_external_lock(thd, table_share, lock_type);
6520   }
6521 
6522   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
6523       MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
6524       MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
6525   {
6526     if (lock_type == F_RDLCK)
6527     {
6528       MYSQL_HANDLER_RDLOCK_DONE(error);
6529     }
6530     else if (lock_type == F_WRLCK)
6531     {
6532       MYSQL_HANDLER_WRLOCK_DONE(error);
6533     }
6534     else if (lock_type == F_UNLCK)
6535     {
6536       MYSQL_HANDLER_UNLOCK_DONE(error);
6537     }
6538   }
6539   DBUG_RETURN(error);
6540 }
6541 
6542 
6543 /** @brief
6544   Check handler usage and reset state of file to after 'open'
6545 */
6546 int handler::ha_reset()
6547 {
6548   DBUG_ENTER("ha_reset");
6549   /* Check that we have called all proper deallocation functions */
6550   DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
6551               table->s->column_bitmap_size ==
6552               (uchar*) table->def_write_set.bitmap);
6553   DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
6554   DBUG_ASSERT(!table->file->keyread_enabled());
6555   /* ensure that ha_index_end / ha_rnd_end has been called */
6556   DBUG_ASSERT(inited == NONE);
6557   /* reset the bitmaps to point to defaults */
6558   table->default_column_bitmaps();
6559   pushed_cond= NULL;
6560   tracker= NULL;
6561   mark_trx_read_write_done= 0;
6562   clear_cached_table_binlog_row_based_flag();
6563   /* Reset information about pushed engine conditions */
6564   cancel_pushed_idx_cond();
6565   /* Reset information about pushed index conditions */
6566   cancel_pushed_rowid_filter();
6567   clear_top_table_fields();
6568   DBUG_RETURN(reset());
6569 }
6570 
6571 #ifdef WITH_WSREP
6572 static int wsrep_after_row(THD *thd)
6573 {
6574   DBUG_ENTER("wsrep_after_row");
6575   /* enforce wsrep_max_ws_rows */
6576   thd->wsrep_affected_rows++;
6577   if (wsrep_max_ws_rows &&
6578       wsrep_thd_is_local(thd) &&
6579       thd->wsrep_affected_rows > wsrep_max_ws_rows)
6580   {
6581     trans_rollback_stmt(thd) || trans_rollback(thd);
6582     my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
6583     DBUG_RETURN(ER_ERROR_DURING_COMMIT);
6584   }
6585   else if (wsrep_after_row_internal(thd))
6586   {
6587     DBUG_RETURN(ER_LOCK_DEADLOCK);
6588   }
6589   DBUG_RETURN(0);
6590 }
6591 #endif /* WITH_WSREP */
6592 
6593 static int check_duplicate_long_entry_key(TABLE *table, handler *h,
6594                                           const uchar *new_rec, uint key_no)
6595 {
6596   Field *hash_field;
6597   int result, error= 0;
6598   KEY *key_info= table->key_info + key_no;
6599   hash_field= key_info->key_part->field;
6600   uchar ptr[HA_HASH_KEY_LENGTH_WITH_NULL];
6601 
6602   DBUG_ASSERT((key_info->flags & HA_NULL_PART_KEY &&
6603                key_info->key_length == HA_HASH_KEY_LENGTH_WITH_NULL)
6604               || key_info->key_length == HA_HASH_KEY_LENGTH_WITHOUT_NULL);
6605 
6606   if (hash_field->is_real_null())
6607     return 0;
6608 
6609   key_copy(ptr, new_rec, key_info, key_info->key_length, false);
6610 
6611   if (!table->check_unique_buf)
6612     table->check_unique_buf= (uchar *)alloc_root(&table->mem_root,
6613                                                  table->s->reclength);
6614 
6615   result= h->ha_index_init(key_no, 0);
6616   if (result)
6617     return result;
6618   store_record(table, check_unique_buf);
6619   result= h->ha_index_read_map(table->record[0],
6620                                ptr, HA_WHOLE_KEY, HA_READ_KEY_EXACT);
6621   if (!result)
6622   {
6623     bool is_same;
6624     Field * t_field;
6625     Item_func_hash * temp= (Item_func_hash *)hash_field->vcol_info->expr;
6626     Item ** arguments= temp->arguments();
6627     uint arg_count= temp->argument_count();
6628     do
6629     {
6630       my_ptrdiff_t diff= table->check_unique_buf - new_rec;
6631       is_same= true;
6632       for (uint j=0; is_same && j < arg_count; j++)
6633       {
6634         DBUG_ASSERT(arguments[j]->type() == Item::FIELD_ITEM ||
6635                     // this one for left(fld_name,length)
6636                     arguments[j]->type() == Item::FUNC_ITEM);
6637         if (arguments[j]->type() == Item::FIELD_ITEM)
6638         {
6639           t_field= static_cast<Item_field *>(arguments[j])->field;
6640           if (t_field->cmp_offset(diff))
6641             is_same= false;
6642         }
6643         else
6644         {
6645           Item_func_left *fnc= static_cast<Item_func_left *>(arguments[j]);
6646           DBUG_ASSERT(!my_strcasecmp(system_charset_info, "left", fnc->func_name()));
6647           DBUG_ASSERT(fnc->arguments()[0]->type() == Item::FIELD_ITEM);
6648           t_field= static_cast<Item_field *>(fnc->arguments()[0])->field;
6649           uint length= (uint)fnc->arguments()[1]->val_int();
6650           if (t_field->cmp_prefix(t_field->ptr, t_field->ptr + diff, length))
6651             is_same= false;
6652         }
6653       }
6654     }
6655     while (!is_same && !(result= h->ha_index_next_same(table->record[0],
6656                          ptr, key_info->key_length)));
6657     if (is_same)
6658       error= HA_ERR_FOUND_DUPP_KEY;
6659     goto exit;
6660   }
6661   if (result != HA_ERR_KEY_NOT_FOUND)
6662     error= result;
6663 exit:
6664   if (error == HA_ERR_FOUND_DUPP_KEY)
6665   {
6666     table->file->errkey= key_no;
6667     if (h->ha_table_flags() & HA_DUPLICATE_POS)
6668     {
6669       h->position(table->record[0]);
6670       memcpy(table->file->dup_ref, h->ref, h->ref_length);
6671     }
6672   }
6673   restore_record(table, check_unique_buf);
6674   h->ha_index_end();
6675   return error;
6676 }
6677 
6678 /** @brief
6679     check whether inserted records breaks the
6680     unique constraint on long columns.
6681     @returns 0 if no duplicate else returns error
6682   */
6683 static int check_duplicate_long_entries(TABLE *table, handler *h,
6684                                         const uchar *new_rec)
6685 {
6686   table->file->errkey= -1;
6687   int result;
6688   for (uint i= 0; i < table->s->keys; i++)
6689   {
6690     if (table->key_info[i].algorithm == HA_KEY_ALG_LONG_HASH &&
6691             (result= check_duplicate_long_entry_key(table, h, new_rec, i)))
6692       return result;
6693   }
6694   return 0;
6695 }
6696 
6697 /** @brief
6698     check whether updated records breaks the
6699     unique constraint on long columns.
6700     In the case of update we just need to check the specic key
6701     reason for that is consider case
6702     create table t1(a blob , b blob , x blob , y blob ,unique(a,b)
6703                                                     ,unique(x,y))
6704     and update statement like this
6705     update t1 set a=23+a; in this case if we try to scan for
6706     whole keys in table then index scan on x_y will return 0
6707     because data is same so in the case of update we take
6708     key as a parameter in normal insert key should be -1
6709     @returns 0 if no duplicate else returns error
6710   */
6711 static int check_duplicate_long_entries_update(TABLE *table, handler *h, uchar *new_rec)
6712 {
6713   Field *field;
6714   uint key_parts;
6715   int error= 0;
6716   KEY *keyinfo;
6717   KEY_PART_INFO *keypart;
6718   /*
6719      Here we are comparing whether new record and old record are same
6720      with respect to fields in hash_str
6721    */
6722   uint reclength= (uint) (table->record[1] - table->record[0]);
6723   table->clone_handler_for_update();
6724   for (uint i= 0; i < table->s->keys; i++)
6725   {
6726     keyinfo= table->key_info + i;
6727     if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH)
6728     {
6729       key_parts= fields_in_hash_keyinfo(keyinfo);
6730       keypart= keyinfo->key_part - key_parts;
6731       for (uint j= 0; j < key_parts; j++, keypart++)
6732       {
6733         field= keypart->field;
6734         /* Compare fields if they are different then check for duplicates*/
6735         if(field->cmp_binary_offset(reclength))
6736         {
6737           if((error= check_duplicate_long_entry_key(table, table->update_handler,
6738                                                  new_rec, i)))
6739             goto exit;
6740           /*
6741             break because check_duplicate_long_entries_key will
6742             take care of remaining fields
6743            */
6744           break;
6745         }
6746       }
6747     }
6748   }
6749   exit:
6750   return error;
6751 }
6752 
6753 int handler::ha_write_row(const uchar *buf)
6754 {
6755   int error;
6756   Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
6757   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6758               m_lock_type == F_WRLCK);
6759   DBUG_ENTER("handler::ha_write_row");
6760   DEBUG_SYNC_C("ha_write_row_start");
6761 
6762   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
6763   mark_trx_read_write();
6764   increment_statistics(&SSV::ha_write_count);
6765 
6766   if (table->s->long_unique_table)
6767   {
6768     if (this->inited == RND)
6769       table->clone_handler_for_update();
6770     handler *h= table->update_handler ? table->update_handler : table->file;
6771     if ((error= check_duplicate_long_entries(table, h, buf)))
6772       DBUG_RETURN(error);
6773   }
6774   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
6775                       { error= write_row(buf); })
6776 
6777   MYSQL_INSERT_ROW_DONE(error);
6778   if (likely(!error) && !row_already_logged)
6779   {
6780     rows_changed++;
6781     error= binlog_log_row(table, 0, buf, log_func);
6782 #ifdef WITH_WSREP
6783     if (table_share->tmp_table == NO_TMP_TABLE &&
6784         WSREP(ha_thd()) && ht->flags & HTON_WSREP_REPLICATION &&
6785         !error && (error= wsrep_after_row(ha_thd())))
6786     {
6787       DBUG_RETURN(error);
6788     }
6789 #endif /* WITH_WSREP */
6790   }
6791 
6792   DEBUG_SYNC_C("ha_write_row_end");
6793   DBUG_RETURN(error);
6794 }
6795 
6796 
6797 int handler::ha_update_row(const uchar *old_data, const uchar *new_data)
6798 {
6799   int error;
6800   Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
6801   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6802               m_lock_type == F_WRLCK);
6803 
6804   /*
6805     Some storage engines require that the new record is in record[0]
6806     (and the old record is in record[1]).
6807    */
6808   DBUG_ASSERT(new_data == table->record[0]);
6809   DBUG_ASSERT(old_data == table->record[1]);
6810 
6811   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6812   mark_trx_read_write();
6813   increment_statistics(&SSV::ha_update_count);
6814   if (table->s->long_unique_table &&
6815           (error= check_duplicate_long_entries_update(table, table->file, (uchar *)new_data)))
6816   {
6817     return error;
6818   }
6819 
6820   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
6821                       { error= update_row(old_data, new_data);})
6822 
6823   MYSQL_UPDATE_ROW_DONE(error);
6824   if (likely(!error) && !row_already_logged)
6825   {
6826     rows_changed++;
6827     error= binlog_log_row(table, old_data, new_data, log_func);
6828 #ifdef WITH_WSREP
6829     THD *thd= ha_thd();
6830     bool is_wsrep= WSREP(thd);
6831     /* for SR, the followin wsrep_after_row() may replicate a fragment, so we have to
6832        declare potential PA unsafe before that*/
6833     if (table->s->primary_key == MAX_KEY &&
6834 	is_wsrep && wsrep_thd_is_local(thd))
6835     {
6836       WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
6837       if (thd->wsrep_cs().mark_transaction_pa_unsafe())
6838       {
6839         WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe");
6840       }
6841     }
6842     if (table_share->tmp_table == NO_TMP_TABLE &&
6843         is_wsrep && ht->flags & HTON_WSREP_REPLICATION &&
6844         !error && (error= wsrep_after_row(thd)))
6845     {
6846       return error;
6847     }
6848 #endif /* WITH_WSREP */
6849   }
6850   return error;
6851 }
6852 
6853 /*
6854   Update first row. Only used by sequence tables
6855 */
6856 
6857 int handler::update_first_row(const uchar *new_data)
6858 {
6859   int error;
6860   if (likely(!(error= ha_rnd_init(1))))
6861   {
6862     int end_error;
6863     if (likely(!(error= ha_rnd_next(table->record[1]))))
6864     {
6865       /*
6866         We have to do the memcmp as otherwise we may get error 169 from InnoDB
6867       */
6868       if (memcmp(new_data, table->record[1], table->s->reclength))
6869         error= update_row(table->record[1], new_data);
6870     }
6871     end_error= ha_rnd_end();
6872     if (likely(!error))
6873       error= end_error;
6874     /* Logging would be wrong if update_row works but ha_rnd_end fails */
6875     DBUG_ASSERT(!end_error || error != 0);
6876   }
6877   return error;
6878 }
6879 
6880 
6881 int handler::ha_delete_row(const uchar *buf)
6882 {
6883   int error;
6884   Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
6885   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
6886               m_lock_type == F_WRLCK);
6887   /*
6888     Normally table->record[0] is used, but sometimes table->record[1] is used.
6889   */
6890   DBUG_ASSERT(buf == table->record[0] ||
6891               buf == table->record[1]);
6892 
6893   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6894   mark_trx_read_write();
6895   increment_statistics(&SSV::ha_delete_count);
6896 
6897   TABLE_IO_WAIT(tracker, m_psi, PSI_TABLE_DELETE_ROW, active_index, 0,
6898     { error= delete_row(buf);})
6899   MYSQL_DELETE_ROW_DONE(error);
6900   if (likely(!error))
6901   {
6902     rows_changed++;
6903     error= binlog_log_row(table, buf, 0, log_func);
6904 #ifdef WITH_WSREP
6905     THD *thd= ha_thd();
6906     bool is_wsrep= WSREP(thd);
6907     /* for SR, the followin wsrep_after_row() may replicate a fragment, so we have to
6908        declare potential PA unsafe before that*/
6909     if (table->s->primary_key == MAX_KEY &&
6910 	is_wsrep && wsrep_thd_is_local(thd))
6911     {
6912       WSREP_DEBUG("marking trx as PA unsafe pk %d", table->s->primary_key);
6913       if (thd->wsrep_cs().mark_transaction_pa_unsafe())
6914       {
6915         WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe");
6916       }
6917     }
6918     if (table_share->tmp_table == NO_TMP_TABLE &&
6919         is_wsrep && ht->flags & HTON_WSREP_REPLICATION &&
6920         !error && (error= wsrep_after_row(thd)))
6921     {
6922       return error;
6923     }
6924 #endif /* WITH_WSREP */
6925   }
6926   return error;
6927 }
6928 
6929 
6930 /**
6931   Execute a direct update request.  A direct update request updates all
6932   qualified rows in a single operation, rather than one row at a time.
6933   In a Spider cluster the direct update operation is pushed down to the
6934   child levels of the cluster.
6935 
6936   Note that this can't be used in case of statment logging
6937 
6938   @param  update_rows   Number of updated rows.
6939 
6940   @retval 0             Success.
6941   @retval != 0          Failure.
6942 */
6943 
6944 int handler::ha_direct_update_rows(ha_rows *update_rows, ha_rows *found_rows)
6945 {
6946   int error;
6947 
6948   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
6949   mark_trx_read_write();
6950 
6951   error = direct_update_rows(update_rows, found_rows);
6952   MYSQL_UPDATE_ROW_DONE(error);
6953   return error;
6954 }
6955 
6956 
6957 /**
6958   Execute a direct delete request.  A direct delete request deletes all
6959   qualified rows in a single operation, rather than one row at a time.
6960   In a Spider cluster the direct delete operation is pushed down to the
6961   child levels of the cluster.
6962 
6963   @param  delete_rows   Number of deleted rows.
6964 
6965   @retval 0             Success.
6966   @retval != 0          Failure.
6967 */
6968 
6969 int handler::ha_direct_delete_rows(ha_rows *delete_rows)
6970 {
6971   int error;
6972   /* Ensure we are not using binlog row */
6973   DBUG_ASSERT(!table->in_use->is_current_stmt_binlog_format_row());
6974 
6975   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
6976   mark_trx_read_write();
6977 
6978   error = direct_delete_rows(delete_rows);
6979   MYSQL_DELETE_ROW_DONE(error);
6980   return error;
6981 }
6982 
6983 
6984 /** @brief
6985   use_hidden_primary_key() is called in case of an update/delete when
6986   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
6987   but we don't have a primary key
6988 */
6989 void handler::use_hidden_primary_key()
6990 {
6991   /* fallback to use all columns in the table to identify row */
6992   table->column_bitmaps_set(&table->s->all_set, table->write_set);
6993 }
6994 
6995 
6996 /**
6997   Get an initialized ha_share.
6998 
6999   @return Initialized ha_share
7000     @retval NULL    ha_share is not yet initialized.
7001     @retval != NULL previous initialized ha_share.
7002 
7003   @note
7004   If not a temp table, then LOCK_ha_data must be held.
7005 */
7006 
7007 Handler_share *handler::get_ha_share_ptr()
7008 {
7009   DBUG_ENTER("handler::get_ha_share_ptr");
7010   DBUG_ASSERT(ha_share);
7011   DBUG_ASSERT(table_share);
7012 
7013 #ifndef DBUG_OFF
7014   if (table_share->tmp_table == NO_TMP_TABLE)
7015     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7016 #endif
7017 
7018   DBUG_RETURN(*ha_share);
7019 }
7020 
7021 
7022 /**
7023   Set ha_share to be used by all instances of the same table/partition.
7024 
7025   @param ha_share    Handler_share to be shared.
7026 
7027   @note
7028   If not a temp table, then LOCK_ha_data must be held.
7029 */
7030 
7031 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
7032 {
7033   DBUG_ENTER("handler::set_ha_share_ptr");
7034   DBUG_ASSERT(ha_share);
7035 #ifndef DBUG_OFF
7036   if (table_share->tmp_table == NO_TMP_TABLE)
7037     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7038 #endif
7039 
7040   *ha_share= arg_ha_share;
7041   DBUG_VOID_RETURN;
7042 }
7043 
7044 
7045 /**
7046   Take a lock for protecting shared handler data.
7047 */
7048 
7049 void handler::lock_shared_ha_data()
7050 {
7051   DBUG_ASSERT(table_share);
7052   if (table_share->tmp_table == NO_TMP_TABLE)
7053     mysql_mutex_lock(&table_share->LOCK_ha_data);
7054 }
7055 
7056 
7057 /**
7058   Release lock for protecting ha_share.
7059 */
7060 
7061 void handler::unlock_shared_ha_data()
7062 {
7063   DBUG_ASSERT(table_share);
7064   if (table_share->tmp_table == NO_TMP_TABLE)
7065     mysql_mutex_unlock(&table_share->LOCK_ha_data);
7066 }
7067 
7068 /** @brief
7069   Dummy function which accept information about log files which is not need
7070   by handlers
7071 */
7072 void signal_log_not_needed(struct handlerton, char *log_file)
7073 {
7074   DBUG_ENTER("signal_log_not_needed");
7075   DBUG_PRINT("enter", ("logfile '%s'", log_file));
7076   DBUG_VOID_RETURN;
7077 }
7078 
7079 void handler::set_lock_type(enum thr_lock_type lock)
7080 {
7081   table->reginfo.lock_type= lock;
7082 }
7083 
7084 Compare_keys handler::compare_key_parts(const Field &old_field,
7085                                         const Column_definition &new_field,
7086                                         const KEY_PART_INFO &old_part,
7087                                         const KEY_PART_INFO &new_part) const
7088 {
7089   if (!old_field.is_equal(new_field))
7090     return Compare_keys::NotEqual;
7091 
7092   if (old_part.length != new_part.length)
7093     return Compare_keys::NotEqual;
7094 
7095   return Compare_keys::Equal;
7096 }
7097 
7098 #ifdef WITH_WSREP
7099 /**
7100   @details
7101   This function makes the storage engine to force the victim transaction
7102   to abort. Currently, only innodb has this functionality, but any SE
7103   implementing the wsrep API should provide this service to support
7104   multi-master operation.
7105 
7106   @note Aborting the transaction does NOT end it, it still has to
7107   be rolled back with hton->rollback().
7108 
7109   @note It is safe to abort from one thread (bf_thd) the transaction,
7110   running in another thread (victim_thd), because InnoDB's lock_sys and
7111   trx_mutex guarantee the necessary protection. However, its not safe
7112   to access victim_thd->transaction, because it's not protected from
7113   concurrent accesses. And it's an overkill to take LOCK_plugin and
7114   iterate the whole installed_htons[] array every time.
7115 
7116   @param bf_thd       brute force THD asking for the abort
7117   @param victim_thd   victim THD to be aborted
7118 
7119   @return
7120     always 0
7121 */
7122 
7123 int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
7124 {
7125   DBUG_ENTER("ha_abort_transaction");
7126   if (!WSREP(bf_thd) &&
7127       !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
7128         wsrep_thd_is_toi(bf_thd))) {
7129     DBUG_RETURN(0);
7130   }
7131 
7132   handlerton *hton= installed_htons[DB_TYPE_INNODB];
7133   if (hton && hton->abort_transaction)
7134   {
7135     hton->abort_transaction(hton, bf_thd, victim_thd, signal);
7136   }
7137   else
7138   {
7139     WSREP_WARN("Cannot abort InnoDB transaction");
7140   }
7141 
7142   DBUG_RETURN(0);
7143 }
7144 #endif /* WITH_WSREP */
7145 
7146 
7147 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
7148 /*
7149   Example of transaction log management functions based on assumption that logs
7150   placed into a directory
7151 */
7152 #include <my_dir.h>
7153 #include <my_sys.h>
7154 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
7155 {
7156   void *buffer;
7157   int res= 1;
7158   struct handler_iterator iterator;
7159   struct handler_log_file_data data;
7160 
7161   if (!hton->create_iterator)
7162     return 1; /* iterator creator is not supported */
7163 
7164   if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
7165       HA_ITERATOR_OK)
7166   {
7167     /* error during creation of log iterator or iterator is not supported */
7168     return 1;
7169   }
7170   while((*iterator.next)(&iterator, (void*)&data) == 0)
7171   {
7172     printf("%s\n", data.filename.str);
7173     if (data.status == HA_LOG_STATUS_FREE &&
7174         mysql_file_delete(INSTRUMENT_ME,
7175                           data.filename.str, MYF(MY_WME)))
7176       goto err;
7177   }
7178   res= 0;
7179 err:
7180   (*iterator.destroy)(&iterator);
7181   return res;
7182 }
7183 
7184 
7185 /*
7186   Here we should get info from handler where it save logs but here is
7187   just example, so we use constant.
7188   IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
7189   rights on it except root and it consist of directories only at lest for
7190   *nix (sorry, can't find windows-safe solution here, but it is only example).
7191 */
7192 #define fl_dir FN_ROOTDIR
7193 
7194 
7195 /** @brief
7196   Dummy function to return log status should be replaced by function which
7197   really detect the log status and check that the file is a log of this
7198   handler.
7199 */
7200 enum log_status fl_get_log_status(char *log)
7201 {
7202   MY_STAT stat_buff;
7203   if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
7204     return HA_LOG_STATUS_INUSE;
7205   return HA_LOG_STATUS_NOSUCHLOG;
7206 }
7207 
7208 
7209 struct fl_buff
7210 {
7211   LEX_STRING *names;
7212   enum log_status *statuses;
7213   uint32 entries;
7214   uint32 current;
7215 };
7216 
7217 
7218 int fl_log_iterator_next(struct handler_iterator *iterator,
7219                           void *iterator_object)
7220 {
7221   struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
7222   struct handler_log_file_data *data=
7223     (struct handler_log_file_data *) iterator_object;
7224   if (buff->current >= buff->entries)
7225     return 1;
7226   data->filename= buff->names[buff->current];
7227   data->status= buff->statuses[buff->current];
7228   buff->current++;
7229   return 0;
7230 }
7231 
7232 
7233 void fl_log_iterator_destroy(struct handler_iterator *iterator)
7234 {
7235   my_free(iterator->buffer);
7236 }
7237 
7238 
7239 /** @brief
7240   returns buffer, to be assigned in handler_iterator struct
7241 */
7242 enum handler_create_iterator_result
7243 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
7244 {
7245   MY_DIR *dirp;
7246   struct fl_buff *buff;
7247   char *name_ptr;
7248   uchar *ptr;
7249   FILEINFO *file;
7250   uint32 i;
7251 
7252   /* to be able to make my_free without crash in case of error */
7253   iterator->buffer= 0;
7254 
7255   if (!(dirp = my_dir(fl_dir, MYF(MY_THREAD_SPECIFIC))))
7256   {
7257     return HA_ITERATOR_ERROR;
7258   }
7259   if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
7260                              ((ALIGN_SIZE(sizeof(LEX_STRING)) +
7261                                sizeof(enum log_status) +
7262                                + FN_REFLEN + 1) *
7263                               (uint) dirp->number_off_files),
7264                              MYF(MY_THREAD_SPECIFIC))) == 0)
7265   {
7266     return HA_ITERATOR_ERROR;
7267   }
7268   buff= (struct fl_buff *)ptr;
7269   buff->entries= buff->current= 0;
7270   ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
7271   buff->names= (LEX_STRING*) (ptr);
7272   ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
7273                (uint) dirp->number_off_files));
7274   buff->statuses= (enum log_status *)(ptr);
7275   name_ptr= (char *)(ptr + (sizeof(enum log_status) *
7276                             (uint) dirp->number_off_files));
7277   for (i=0 ; i < (uint) dirp->number_off_files  ; i++)
7278   {
7279     enum log_status st;
7280     file= dirp->dir_entry + i;
7281     if ((file->name[0] == '.' &&
7282          ((file->name[1] == '.' && file->name[2] == '\0') ||
7283             file->name[1] == '\0')))
7284       continue;
7285     if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
7286       continue;
7287     name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
7288                        FN_REFLEN, fl_dir, file->name, NullS);
7289     buff->names[buff->entries].length= (name_ptr -
7290                                         buff->names[buff->entries].str);
7291     buff->statuses[buff->entries]= st;
7292     buff->entries++;
7293   }
7294 
7295   iterator->buffer= buff;
7296   iterator->next= &fl_log_iterator_next;
7297   iterator->destroy= &fl_log_iterator_destroy;
7298   my_dirend(dirp);
7299   return HA_ITERATOR_OK;
7300 }
7301 
7302 
7303 /* An example of a iterator creator */
7304 enum handler_create_iterator_result
7305 fl_create_iterator(enum handler_iterator_type type,
7306                    struct handler_iterator *iterator)
7307 {
7308   switch(type) {
7309   case HA_TRANSACTLOG_ITERATOR:
7310     return fl_log_iterator_buffer_init(iterator);
7311   default:
7312     return HA_ITERATOR_UNSUPPORTED;
7313   }
7314 }
7315 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
7316 
7317 
7318 bool HA_CREATE_INFO::check_conflicting_charset_declarations(CHARSET_INFO *cs)
7319 {
7320   if ((used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
7321       /* DEFAULT vs explicit, or explicit vs DEFAULT */
7322       (((default_table_charset == NULL) != (cs == NULL)) ||
7323       /* Two different explicit character sets */
7324        (default_table_charset && cs &&
7325         !my_charset_same(default_table_charset, cs))))
7326   {
7327     my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
7328              "CHARACTER SET ", default_table_charset ?
7329                                default_table_charset->csname : "DEFAULT",
7330              "CHARACTER SET ", cs ? cs->csname : "DEFAULT");
7331     return true;
7332   }
7333   return false;
7334 }
7335 
7336 /* Remove all indexes for a given table from global index statistics */
7337 
7338 static
7339 int del_global_index_stats_for_table(THD *thd, uchar* cache_key, size_t cache_key_length)
7340 {
7341   int res = 0;
7342   DBUG_ENTER("del_global_index_stats_for_table");
7343 
7344   mysql_mutex_lock(&LOCK_global_index_stats);
7345 
7346   for (uint i= 0; i < global_index_stats.records;)
7347   {
7348     INDEX_STATS *index_stats =
7349       (INDEX_STATS*) my_hash_element(&global_index_stats, i);
7350 
7351     /* We search correct db\0table_name\0 string */
7352     if (index_stats &&
7353 	index_stats->index_name_length >= cache_key_length &&
7354 	!memcmp(index_stats->index, cache_key, cache_key_length))
7355     {
7356       res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7357       /*
7358           In our HASH implementation on deletion one elements
7359           is moved into a place where a deleted element was,
7360           and the last element is moved into the empty space.
7361           Thus we need to re-examine the current element, but
7362           we don't have to restart the search from the beginning.
7363       */
7364     }
7365     else
7366       i++;
7367   }
7368 
7369   mysql_mutex_unlock(&LOCK_global_index_stats);
7370   DBUG_RETURN(res);
7371 }
7372 
7373 /* Remove a table from global table statistics */
7374 
7375 int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table)
7376 {
7377   TABLE_STATS *table_stats;
7378   int res = 0;
7379   uchar *cache_key;
7380   size_t cache_key_length;
7381   DBUG_ENTER("del_global_table_stat");
7382 
7383   cache_key_length= db->length + 1 + table->length + 1;
7384 
7385   if(!(cache_key= (uchar *)my_malloc(cache_key_length,
7386                                      MYF(MY_WME | MY_ZEROFILL))))
7387   {
7388     /* Out of memory error already given */
7389     res = 1;
7390     goto end;
7391   }
7392 
7393   memcpy(cache_key, db->str, db->length);
7394   memcpy(cache_key + db->length + 1, table->str, table->length);
7395 
7396   res= del_global_index_stats_for_table(thd, cache_key, cache_key_length);
7397 
7398   mysql_mutex_lock(&LOCK_global_table_stats);
7399 
7400   if((table_stats= (TABLE_STATS*) my_hash_search(&global_table_stats,
7401                                                 cache_key,
7402                                                 cache_key_length)))
7403     res= my_hash_delete(&global_table_stats, (uchar*)table_stats);
7404 
7405   my_free(cache_key);
7406   mysql_mutex_unlock(&LOCK_global_table_stats);
7407 
7408 end:
7409   DBUG_RETURN(res);
7410 }
7411 
7412 /* Remove a index from global index statistics */
7413 
7414 int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info)
7415 {
7416   INDEX_STATS *index_stats;
7417   size_t key_length= table->s->table_cache_key.length + key_info->name.length + 1;
7418   int res = 0;
7419   DBUG_ENTER("del_global_index_stat");
7420   mysql_mutex_lock(&LOCK_global_index_stats);
7421 
7422   if((index_stats= (INDEX_STATS*) my_hash_search(&global_index_stats,
7423                                                 key_info->cache_name,
7424                                                 key_length)))
7425     res= my_hash_delete(&global_index_stats, (uchar*)index_stats);
7426 
7427   mysql_mutex_unlock(&LOCK_global_index_stats);
7428   DBUG_RETURN(res);
7429 }
7430 
7431 /*****************************************************************************
7432   VERSIONING functions
7433 ******************************************************************************/
7434 
7435 bool Vers_parse_info::is_start(const char *name) const
7436 {
7437   DBUG_ASSERT(name);
7438   return as_row.start && as_row.start.streq(name);
7439 }
7440 bool Vers_parse_info::is_end(const char *name) const
7441 {
7442   DBUG_ASSERT(name);
7443   return as_row.end && as_row.end.streq(name);
7444 }
7445 bool Vers_parse_info::is_start(const Create_field &f) const
7446 {
7447   return f.flags & VERS_ROW_START;
7448 }
7449 bool Vers_parse_info::is_end(const Create_field &f) const
7450 {
7451   return f.flags & VERS_ROW_END;
7452 }
7453 
7454 static Create_field *vers_init_sys_field(THD *thd, const char *field_name, int flags, bool integer)
7455 {
7456   Create_field *f= new (thd->mem_root) Create_field();
7457   if (!f)
7458     return NULL;
7459 
7460   f->field_name.str= field_name;
7461   f->field_name.length= strlen(field_name);
7462   f->charset= system_charset_info;
7463   f->flags= flags | NOT_NULL_FLAG;
7464   if (integer)
7465   {
7466     DBUG_ASSERT(0); // Not implemented yet
7467     f->set_handler(&type_handler_vers_trx_id);
7468     f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1;
7469     f->flags|= UNSIGNED_FLAG;
7470   }
7471   else
7472   {
7473     f->set_handler(&type_handler_timestamp2);
7474     f->length= MAX_DATETIME_PRECISION;
7475   }
7476   f->invisible= DBUG_EVALUATE_IF("sysvers_show", VISIBLE, INVISIBLE_SYSTEM);
7477 
7478   if (f->check(thd))
7479     return NULL;
7480 
7481   return f;
7482 }
7483 
7484 static bool vers_create_sys_field(THD *thd, const char *field_name,
7485                                   Alter_info *alter_info, int flags)
7486 {
7487   Create_field *f= vers_init_sys_field(thd, field_name, flags, false);
7488   if (!f)
7489     return true;
7490 
7491   alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7492   alter_info->create_list.push_back(f);
7493 
7494   return false;
7495 }
7496 
7497 const Lex_ident Vers_parse_info::default_start= "row_start";
7498 const Lex_ident Vers_parse_info::default_end= "row_end";
7499 
7500 bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info)
7501 {
7502   // If user specified some of these he must specify the others too. Do nothing.
7503   if (*this)
7504     return false;
7505 
7506   alter_info->flags|= ALTER_PARSER_ADD_COLUMN;
7507 
7508   period= start_end_t(default_start, default_end);
7509   as_row= period;
7510 
7511   if (vers_create_sys_field(thd, default_start, alter_info, VERS_ROW_START) ||
7512       vers_create_sys_field(thd, default_end, alter_info, VERS_ROW_END))
7513   {
7514     return true;
7515   }
7516   return false;
7517 }
7518 
7519 
7520 bool Table_scope_and_contents_source_st::vers_fix_system_fields(
7521   THD *thd, Alter_info *alter_info, const TABLE_LIST &create_table)
7522 {
7523   DBUG_ASSERT(!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING));
7524 
7525   DBUG_EXECUTE_IF("sysvers_force", if (!tmp_table()) {
7526                   alter_info->flags|= ALTER_ADD_SYSTEM_VERSIONING;
7527                   options|= HA_VERSIONED_TABLE; });
7528 
7529   if (!vers_info.need_check(alter_info))
7530     return false;
7531 
7532   const bool add_versioning= alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING;
7533 
7534   if (!vers_info.versioned_fields && vers_info.unversioned_fields && !add_versioning)
7535   {
7536     // All is correct but this table is not versioned.
7537     options&= ~HA_VERSIONED_TABLE;
7538     return false;
7539   }
7540 
7541   if (!add_versioning && vers_info && !vers_info.versioned_fields)
7542   {
7543     my_error(ER_MISSING, MYF(0), create_table.table_name.str,
7544              "WITH SYSTEM VERSIONING");
7545     return true;
7546   }
7547 
7548   List_iterator<Create_field> it(alter_info->create_list);
7549   while (Create_field *f= it++)
7550   {
7551     if (f->vers_sys_field())
7552       continue;
7553     if ((f->versioning == Column_definition::VERSIONING_NOT_SET && !add_versioning) ||
7554         f->versioning == Column_definition::WITHOUT_VERSIONING)
7555     {
7556       f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
7557     }
7558   } // while (Create_field *f= it++)
7559 
7560   if (vers_info.fix_implicit(thd, alter_info))
7561     return true;
7562 
7563   return false;
7564 }
7565 
7566 
7567 bool Table_scope_and_contents_source_st::vers_check_system_fields(
7568         THD *thd, Alter_info *alter_info, const Lex_table_name &table_name,
7569         const Lex_table_name &db, int select_count)
7570 {
7571   if (!(options & HA_VERSIONED_TABLE))
7572     return false;
7573 
7574   uint versioned_fields= 0;
7575 
7576   if (!(alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING))
7577   {
7578     uint fieldnr= 0;
7579     List_iterator<Create_field> field_it(alter_info->create_list);
7580     while (Create_field *f= field_it++)
7581     {
7582       /*
7583          The field from the CREATE part can be duplicated in the SELECT part of
7584          CREATE...SELECT. In that case double counts should be avoided.
7585          select_create::create_table_from_items just pushes the fields back into
7586          the create_list, without additional manipulations, so the fields from
7587          SELECT go last there.
7588        */
7589       bool is_dup= false;
7590       if (fieldnr >= alter_info->create_list.elements - select_count)
7591       {
7592         List_iterator<Create_field> dup_it(alter_info->create_list);
7593         for (Create_field *dup= dup_it++; !is_dup && dup != f; dup= dup_it++)
7594           is_dup= Lex_ident(dup->field_name).streq(f->field_name);
7595       }
7596 
7597       if (!(f->flags & VERS_UPDATE_UNVERSIONED_FLAG) && !is_dup)
7598         versioned_fields++;
7599       fieldnr++;
7600     }
7601     if (versioned_fields == VERSIONING_FIELDS)
7602     {
7603       my_error(ER_VERS_TABLE_MUST_HAVE_COLUMNS, MYF(0), table_name.str);
7604       return true;
7605     }
7606   }
7607 
7608   if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) && !versioned_fields)
7609     return false;
7610 
7611   bool can_native= ha_check_storage_engine_flag(db_type,
7612                                                 HTON_NATIVE_SYS_VERSIONING)
7613                    || db_type->db_type == DB_TYPE_PARTITION_DB;
7614 
7615   return vers_info.check_sys_fields(table_name, db, alter_info, can_native);
7616 }
7617 
7618 
7619 bool Vers_parse_info::fix_alter_info(THD *thd, Alter_info *alter_info,
7620                                      HA_CREATE_INFO *create_info, TABLE *table)
7621 {
7622   TABLE_SHARE *share= table->s;
7623   const char *table_name= share->table_name.str;
7624 
7625   if (!need_check(alter_info) && !share->versioned)
7626     return false;
7627 
7628   if (DBUG_EVALUATE_IF("sysvers_force", 0, share->tmp_table))
7629   {
7630     my_error(ER_VERS_NOT_SUPPORTED, MYF(0), "CREATE TEMPORARY TABLE");
7631     return true;
7632   }
7633 
7634   if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING &&
7635       table->versioned())
7636   {
7637     my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
7638     return true;
7639   }
7640 
7641   if (alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING)
7642   {
7643     if (!share->versioned)
7644     {
7645       my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
7646       return true;
7647     }
7648 #ifdef WITH_PARTITION_STORAGE_ENGINE
7649     if (table->part_info &&
7650         table->part_info->part_type == VERSIONING_PARTITION)
7651     {
7652       my_error(ER_DROP_VERSIONING_SYSTEM_TIME_PARTITION, MYF(0), table_name);
7653       return true;
7654     }
7655 #endif
7656 
7657     return false;
7658   }
7659 
7660   if (!(alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING))
7661   {
7662     List_iterator_fast<Create_field> it(alter_info->create_list);
7663     while (Create_field *f= it++)
7664     {
7665       if (f->flags & VERS_SYSTEM_FIELD)
7666       {
7667         if (!table->versioned())
7668         {
7669           my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->s->table_name.str);
7670           return true;
7671         }
7672         my_error(ER_VERS_DUPLICATE_ROW_START_END, MYF(0),
7673                  f->flags & VERS_ROW_START ? "START" : "END", f->field_name.str);
7674         return true;
7675       }
7676     }
7677   }
7678 
7679   if ((alter_info->flags & ALTER_DROP_PERIOD ||
7680        versioned_fields || unversioned_fields) && !share->versioned)
7681   {
7682     my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name);
7683     return true;
7684   }
7685 
7686   if (share->versioned)
7687   {
7688     if (alter_info->flags & ALTER_ADD_PERIOD)
7689     {
7690       my_error(ER_VERS_ALREADY_VERSIONED, MYF(0), table_name);
7691       return true;
7692     }
7693 
7694     // copy info from existing table
7695     create_info->options|= HA_VERSIONED_TABLE;
7696 
7697     DBUG_ASSERT(share->vers_start_field());
7698     DBUG_ASSERT(share->vers_end_field());
7699     Lex_ident start(share->vers_start_field()->field_name);
7700     Lex_ident end(share->vers_end_field()->field_name);
7701     DBUG_ASSERT(start.str);
7702     DBUG_ASSERT(end.str);
7703 
7704     as_row= start_end_t(start, end);
7705     period= as_row;
7706 
7707     if (alter_info->create_list.elements)
7708     {
7709       List_iterator_fast<Create_field> it(alter_info->create_list);
7710       while (Create_field *f= it++)
7711       {
7712         if (f->versioning == Column_definition::WITHOUT_VERSIONING)
7713           f->flags|= VERS_UPDATE_UNVERSIONED_FLAG;
7714 
7715         if (f->change.str && (start.streq(f->change) || end.streq(f->change)))
7716         {
7717           my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str);
7718           return true;
7719         }
7720       }
7721     }
7722 
7723     return false;
7724   }
7725 
7726   return fix_implicit(thd, alter_info);
7727 }
7728 
7729 bool
7730 Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info,
7731                                  TABLE_LIST &src_table, TABLE_LIST &table)
7732 {
7733   List_iterator<Create_field> it(alter_info.create_list);
7734   List_iterator<Key> key_it(alter_info.key_list);
7735   List_iterator<Key_part_spec> kp_it;
7736   Create_field *f, *f_start=NULL, *f_end= NULL;
7737 
7738   DBUG_ASSERT(alter_info.create_list.elements > 2);
7739 
7740   if (create_info.tmp_table())
7741   {
7742     int remove= 2;
7743     while (remove && (f= it++))
7744     {
7745       if (f->flags & VERS_SYSTEM_FIELD)
7746       {
7747         it.remove();
7748         remove--;
7749       }
7750       key_it.rewind();
7751       while (Key *key= key_it++)
7752       {
7753         kp_it.init(key->columns);
7754         while (Key_part_spec *kp= kp_it++)
7755         {
7756           if (0 == lex_string_cmp(system_charset_info, &kp->field_name,
7757                                   &f->field_name))
7758           {
7759             kp_it.remove();
7760           }
7761         }
7762         if (0 == key->columns.elements)
7763         {
7764           key_it.remove();
7765         }
7766       }
7767     }
7768     DBUG_ASSERT(remove == 0);
7769     push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
7770                         ER_UNKNOWN_ERROR,
7771                         "System versioning is stripped from temporary `%s.%s`",
7772                         table.db.str, table.table_name.str);
7773     return false;
7774   }
7775 
7776   while ((f= it++))
7777   {
7778     if (f->flags & VERS_ROW_START)
7779     {
7780       f_start= f;
7781       if (f_end)
7782         break;
7783     }
7784     else if (f->flags & VERS_ROW_END)
7785     {
7786       f_end= f;
7787       if (f_start)
7788         break;
7789     }
7790   }
7791 
7792   if (!f_start || !f_end)
7793   {
7794     my_error(ER_MISSING, MYF(0), src_table.table_name.str,
7795              f_start ? "AS ROW END" : "AS ROW START");
7796     return true;
7797   }
7798 
7799   as_row= start_end_t(f_start->field_name, f_end->field_name);
7800   period= as_row;
7801 
7802   create_info.options|= HA_VERSIONED_TABLE;
7803   return false;
7804 }
7805 
7806 bool Vers_parse_info::need_check(const Alter_info *alter_info) const
7807 {
7808   return versioned_fields || unversioned_fields ||
7809          alter_info->flags & ALTER_ADD_PERIOD ||
7810          alter_info->flags & ALTER_DROP_PERIOD ||
7811          alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING ||
7812          alter_info->flags & ALTER_DROP_SYSTEM_VERSIONING || *this;
7813 }
7814 
7815 bool Vers_parse_info::check_conditions(const Lex_table_name &table_name,
7816                                        const Lex_table_name &db) const
7817 {
7818   if (!as_row.start || !as_row.end)
7819   {
7820     my_error(ER_MISSING, MYF(0), table_name.str,
7821                 as_row.start ? "AS ROW END" : "AS ROW START");
7822     return true;
7823   }
7824 
7825   if (!period.start || !period.end)
7826   {
7827     my_error(ER_MISSING, MYF(0), table_name.str, "PERIOD FOR SYSTEM_TIME");
7828     return true;
7829   }
7830 
7831   if (!as_row.start.streq(period.start) ||
7832       !as_row.end.streq(period.end))
7833   {
7834     my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
7835     return true;
7836   }
7837 
7838   if (db.streq(MYSQL_SCHEMA_NAME))
7839   {
7840     my_error(ER_VERS_DB_NOT_SUPPORTED, MYF(0), MYSQL_SCHEMA_NAME.str);
7841     return true;
7842   }
7843   return false;
7844 }
7845 
7846 
7847 bool Create_field::vers_check_timestamp(const Lex_table_name &table_name) const
7848 {
7849   if (type_handler() == &type_handler_timestamp2 &&
7850       length == MAX_DATETIME_FULL_WIDTH)
7851     return false;
7852 
7853   my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field_name.str, "TIMESTAMP(6)",
7854            table_name.str);
7855   return true;
7856 }
7857 
7858 
7859 bool Create_field::vers_check_bigint(const Lex_table_name &table_name) const
7860 {
7861   if (is_some_bigint() && flags & UNSIGNED_FLAG &&
7862       length == MY_INT64_NUM_DECIMAL_DIGITS - 1)
7863     return false;
7864 
7865   my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), field_name.str,
7866            "BIGINT(20) UNSIGNED", table_name.str);
7867   return true;
7868 }
7869 
7870 
7871 bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
7872                                        const Lex_table_name &db,
7873                                        Alter_info *alter_info,
7874                                        bool can_native) const
7875 {
7876   if (check_conditions(table_name, db))
7877     return true;
7878 
7879   const Create_field *row_start= NULL;
7880   const Create_field *row_end= NULL;
7881 
7882   List_iterator<Create_field> it(alter_info->create_list);
7883   while (Create_field *f= it++)
7884   {
7885     if (!row_start && f->flags & VERS_ROW_START)
7886       row_start= f;
7887     else if (!row_end && f->flags & VERS_ROW_END)
7888       row_end= f;
7889   }
7890 
7891   if (!row_start || !row_end)
7892   {
7893     my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str);
7894     return true;
7895   }
7896 
7897   if (!can_native ||
7898       !row_start->is_some_bigint() ||
7899       !row_end->is_some_bigint())
7900   {
7901     if (row_start->vers_check_timestamp(table_name) ||
7902         row_end->vers_check_timestamp(table_name))
7903       return true;
7904   }
7905   else
7906   {
7907     if (row_start->vers_check_bigint(table_name) ||
7908         row_end->vers_check_bigint(table_name))
7909       return true;
7910 
7911     if (!TR_table::use_transaction_registry)
7912     {
7913       my_error(ER_VERS_TRT_IS_DISABLED, MYF(0));
7914       return true;
7915     }
7916   }
7917 
7918   return false;
7919 }
7920 
7921 bool Table_period_info::check_field(const Create_field* f,
7922                                     const Lex_ident& f_name) const
7923 {
7924   bool res= false;
7925   if (!f)
7926   {
7927     my_error(ER_BAD_FIELD_ERROR, MYF(0), f_name.str, name.str);
7928     res= true;
7929   }
7930   else if (f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATE &&
7931            f->type_handler()->mysql_timestamp_type() != MYSQL_TIMESTAMP_DATETIME)
7932   {
7933     my_error(ER_WRONG_FIELD_SPEC, MYF(0), f->field_name.str);
7934     res= true;
7935   }
7936   else if (f->vcol_info || f->flags & VERS_SYSTEM_FIELD)
7937   {
7938     my_error(ER_PERIOD_FIELD_WRONG_ATTRIBUTES, MYF(0),
7939              f->field_name.str, "GENERATED ALWAYS AS");
7940     res= true;
7941   }
7942 
7943   return res;
7944 }
7945 
7946 bool Table_scope_and_contents_source_st::check_fields(
7947   THD *thd, Alter_info *alter_info,
7948   const Lex_table_name &table_name, const Lex_table_name &db, int select_count)
7949 {
7950   return vers_check_system_fields(thd, alter_info,
7951                                   table_name, db, select_count) ||
7952     check_period_fields(thd, alter_info);
7953 }
7954 
7955 bool Table_scope_and_contents_source_st::check_period_fields(
7956                 THD *thd, Alter_info *alter_info)
7957 {
7958   if (!period_info.name)
7959     return false;
7960 
7961   if (tmp_table())
7962   {
7963     my_error(ER_PERIOD_TEMPORARY_NOT_ALLOWED, MYF(0));
7964     return true;
7965   }
7966 
7967   Table_period_info::start_end_t &period= period_info.period;
7968   const Create_field *row_start= NULL;
7969   const Create_field *row_end= NULL;
7970   List_iterator<Create_field> it(alter_info->create_list);
7971   while (const Create_field *f= it++)
7972   {
7973     if (period.start.streq(f->field_name)) row_start= f;
7974     else if (period.end.streq(f->field_name)) row_end= f;
7975 
7976     if (period_info.name.streq(f->field_name))
7977     {
7978       my_error(ER_DUP_FIELDNAME, MYF(0), f->field_name.str);
7979       return true;
7980     }
7981   }
7982 
7983   bool res= period_info.check_field(row_start, period.start.str)
7984             || period_info.check_field(row_end, period.end.str);
7985   if (res)
7986     return true;
7987 
7988   if (row_start->type_handler() != row_end->type_handler()
7989       || row_start->length != row_end->length)
7990   {
7991     my_error(ER_PERIOD_TYPES_MISMATCH, MYF(0), period_info.name.str);
7992     res= true;
7993   }
7994 
7995   return res;
7996 }
7997 
7998 bool
7999 Table_scope_and_contents_source_st::fix_create_fields(THD *thd,
8000                                                       Alter_info *alter_info,
8001                                                       const TABLE_LIST &create_table)
8002 {
8003   return vers_fix_system_fields(thd, alter_info, create_table)
8004          || fix_period_fields(thd, alter_info);
8005 }
8006 
8007 bool
8008 Table_scope_and_contents_source_st::fix_period_fields(THD *thd,
8009                                                       Alter_info *alter_info)
8010 {
8011   if (!period_info.name)
8012     return false;
8013 
8014   Table_period_info::start_end_t &period= period_info.period;
8015   List_iterator<Create_field> it(alter_info->create_list);
8016   while (Create_field *f= it++)
8017   {
8018     if (period.start.streq(f->field_name) || period.end.streq(f->field_name))
8019     {
8020       f->period= &period_info;
8021       f->flags|= NOT_NULL_FLAG;
8022     }
8023   }
8024   return false;
8025 }
8026