1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 /** @file handler.cc
24 
25     @brief
26   Handler-calling-functions
27 */
28 
29 #include "binlog.h"
30 #include "sql_priv.h"
31 #include "unireg.h"
32 #include "rpl_handler.h"
33 #include "sql_cache.h"                   // query_cache, query_cache_*
34 #include "key.h"     // key_copy, key_unpack, key_cmp_if_same, key_cmp
35 #include "sql_table.h"                   // build_table_filename
36 #include "sql_parse.h"                          // check_stack_overrun
37 #include "sql_acl.h"            // SUPER_ACL
38 #include "sql_base.h"           // free_io_cache
39 #include "discover.h"           // writefrm
40 #include "log_event.h"          // *_rows_log_event
41 #include "rpl_filter.h"
42 #include <myisampack.h>
43 #include "transaction.h"
44 #include <errno.h>
45 #include "probes_mysql.h"
46 #include <mysql/psi/mysql_table.h>
47 #include "debug_sync.h"         // DEBUG_SYNC
48 #include <my_bit.h>
49 #include <list>
50 #include "global_threads.h"
51 
52 #ifdef WITH_PARTITION_STORAGE_ENGINE
53 #include "ha_partition.h"
54 #endif
55 
56 using std::min;
57 using std::max;
58 using std::list;
59 
60 // This is a temporary backporting fix.
61 #ifndef HAVE_LOG2
62 /*
63   This will be slightly slower and perhaps a tiny bit less accurate than
64   doing it the IEEE754 way but log2() should be available on C99 systems.
65 */
log2(double x)66 inline double log2(double x)
67 {
68   return (log(x) / M_LN2);
69 }
70 #endif
71 
72 /*
73   While we have legacy_db_type, we have this array to
74   check for dups and to find handlerton from legacy_db_type.
75   Remove when legacy_db_type is finally gone
76 */
77 st_plugin_int *hton2plugin[MAX_HA];
78 
79 static handlerton *installed_htons[128];
80 
81 #define BITMAP_STACKBUF_SIZE (128/8)
82 
83 KEY_CREATE_INFO default_key_create_info=
84   { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
85 
86 /* number of entries in handlertons[] */
87 ulong total_ha= 0;
88 /* number of storage engines (from handlertons[]) that support 2pc */
89 ulong total_ha_2pc= 0;
90 /* size of savepoint storage area (see ha_init) */
91 ulong savepoint_alloc_size= 0;
92 
93 static const LEX_STRING sys_table_aliases[]=
94 {
95   { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
96   { C_STRING_WITH_LEN("NDB") },       { C_STRING_WITH_LEN("NDBCLUSTER") },
97   { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
98   { C_STRING_WITH_LEN("MERGE") },     { C_STRING_WITH_LEN("MRG_MYISAM") },
99   {NullS, 0}
100 };
101 
102 const char *ha_row_type[] = {
103   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
104   /* Reserved to be "PAGE" in future versions */ "?",
105   "TOKUDB_UNCOMPRESSED", "TOKUDB_ZLIB", "TOKUDB_SNAPPY", "TOKUDB_QUICKLZ",
106   "TOKUDB_LZMA", "TOKUDB_FAST", "TOKUDB_SMALL", "TOKUDB_DEFAULT",
107   "?","?","?"
108 };
109 
110 const char *tx_isolation_names[] =
111 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
112   NullS};
113 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
114 			       tx_isolation_names, NULL};
115 
116 #ifndef DBUG_OFF
117 
ha_legacy_type_name(legacy_db_type legacy_type)118 const char *ha_legacy_type_name(legacy_db_type legacy_type)
119 {
120   switch (legacy_type)
121   {
122   case DB_TYPE_UNKNOWN:
123     return "DB_TYPE_UNKNOWN";
124   case DB_TYPE_DIAB_ISAM:
125     return "DB_TYPE_DIAB_ISAM";
126   case DB_TYPE_HASH:
127     return "DB_TYPE_HASH";
128   case DB_TYPE_MISAM:
129     return "DB_TYPE_MISAM";
130   case DB_TYPE_PISAM:
131     return "DB_TYPE_PISAM";
132   case DB_TYPE_RMS_ISAM:
133     return "DB_TYPE_RMS_ISAM";
134   case DB_TYPE_HEAP:
135     return "DB_TYPE_HEAP";
136   case DB_TYPE_ISAM:
137     return "DB_TYPE_ISAM";
138   case DB_TYPE_MRG_ISAM:
139     return "DB_TYPE_MRG_ISAM";
140   case DB_TYPE_MYISAM:
141     return "DB_TYPE_MYISAM";
142   case DB_TYPE_MRG_MYISAM:
143     return "DB_TYPE_MRG_MYISAM";
144   case DB_TYPE_BERKELEY_DB:
145     return "DB_TYPE_BERKELEY_DB";
146   case DB_TYPE_INNODB:
147     return "DB_TYPE_INNODB";
148   case DB_TYPE_GEMINI:
149     return "DB_TYPE_GEMINI";
150   case DB_TYPE_NDBCLUSTER:
151     return "DB_TYPE_NDBCLUSTER";
152   case DB_TYPE_EXAMPLE_DB:
153     return "DB_TYPE_EXAMPLE_DB";
154   case DB_TYPE_ARCHIVE_DB:
155     return "DB_TYPE_ARCHIVE_DB";
156   case DB_TYPE_CSV_DB:
157     return "DB_TYPE_CSV_DB";
158   case DB_TYPE_FEDERATED_DB:
159     return "DB_TYPE_FEDERATED_DB";
160   case DB_TYPE_BLACKHOLE_DB:
161     return "DB_TYPE_BLACKHOLE_DB";
162   case DB_TYPE_PARTITION_DB:
163     return "DB_TYPE_PARTITION_DB";
164   case DB_TYPE_BINLOG:
165     return "DB_TYPE_BINLOG";
166   case DB_TYPE_SOLID:
167     return "DB_TYPE_SOLID";
168   case DB_TYPE_PBXT:
169     return "DB_TYPE_PBXT";
170   case DB_TYPE_TABLE_FUNCTION:
171     return "DB_TYPE_TABLE_FUNCTION";
172   case DB_TYPE_MEMCACHE:
173     return "DB_TYPE_MEMCACHE";
174   case DB_TYPE_FALCON:
175     return "DB_TYPE_FALCON";
176   case DB_TYPE_MARIA:
177     return "DB_TYPE_MARIA";
178   case DB_TYPE_PERFORMANCE_SCHEMA:
179     return "DB_TYPE_PERFORMANCE_SCHEMA";
180   default:
181     return "DB_TYPE_DYNAMIC";
182   }
183 }
184 #endif
185 
186 /**
187   Database name that hold most of mysqld system tables.
188   Current code assumes that, there exists only some
189   specific "database name" designated as system database.
190 */
191 const char* mysqld_system_database= "mysql";
192 
193 // System tables that belong to mysqld_system_database.
194 st_system_tablename mysqld_system_tables[]= {
195   {mysqld_system_database, "db"},
196   {mysqld_system_database, "user"},
197   {mysqld_system_database, "host"},
198   {mysqld_system_database, "func"},
199   {mysqld_system_database, "proc"},
200   {mysqld_system_database, "event"},
201   {mysqld_system_database, "plugin"},
202   {mysqld_system_database, "servers"},
203   {mysqld_system_database, "procs_priv"},
204   {mysqld_system_database, "tables_priv"},
205   {mysqld_system_database, "proxies_priv"},
206   {mysqld_system_database, "columns_priv"},
207   {mysqld_system_database, "time_zone"},
208   {mysqld_system_database, "time_zone_name"},
209   {mysqld_system_database, "time_zone_leap_second"},
210   {mysqld_system_database, "time_zone_transition"},
211   {mysqld_system_database, "time_zone_transition_type"},
212   {mysqld_system_database, "help_category"},
213   {mysqld_system_database, "help_keyword"},
214   {mysqld_system_database, "help_relation"},
215   {mysqld_system_database, "help_topic"},
216   {(const char *)NULL, (const char *)NULL} /* This must be at the end */
217 };
218 
219 /**
220   This static pointer holds list of system databases from SQL layer and
221   various SE's. The required memory is allocated once, and never freed.
222 */
223 static const char **known_system_databases= NULL;
224 static const char **ha_known_system_databases();
225 
226 // Called for each SE to get SE specific system database.
227 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
228                                            void *arg);
229 
230 // Called for each SE to check if given db.table_name is a system table.
231 static my_bool check_engine_system_table_handlerton(THD *unused,
232                                                     plugin_ref plugin,
233                                                     void *arg);
234 /**
235   Structure used by SE during check for system table.
236   This structure is passed to each SE handlerton and the status (OUT param)
237   is collected.
238 */
239 struct st_sys_tbl_chk_params
240 {
241   const char *db;                             // IN param
242   const char *table_name;                     // IN param
243   bool is_sql_layer_system_table;             // IN param
244   legacy_db_type db_type;                     // IN param
245 
246   enum enum_sys_tbl_chk_status
247   {
248     // db.table_name is not a supported system table.
249     NOT_KNOWN_SYSTEM_TABLE,
250     /*
251       db.table_name is a system table,
252       but may not be supported by SE.
253     */
254     KNOWN_SYSTEM_TABLE,
255     /*
256       db.table_name is a system table,
257       and is supported by SE.
258     */
259     SUPPORTED_SYSTEM_TABLE
260   } status;                                    // OUT param
261 };
262 
263 
ha_default_plugin(THD * thd)264 static plugin_ref ha_default_plugin(THD *thd)
265 {
266   if (thd->variables.table_plugin)
267     return thd->variables.table_plugin;
268   return my_plugin_lock(thd, &global_system_variables.table_plugin);
269 }
270 
271 
272 /** @brief
273   Return the default storage engine handlerton used for non-temp tables
274   for thread
275 
276   SYNOPSIS
277     ha_default_handlerton(thd)
278     thd         current thread
279 
280   RETURN
281     pointer to handlerton
282 */
ha_default_handlerton(THD * thd)283 handlerton *ha_default_handlerton(THD *thd)
284 {
285   plugin_ref plugin= ha_default_plugin(thd);
286   DBUG_ASSERT(plugin);
287   handlerton *hton= plugin_data(plugin, handlerton*);
288   DBUG_ASSERT(hton);
289   return hton;
290 }
291 
292 /** @brief
293   Return the enforced storage engine handlerton for thread
294 
295   SYNOPSIS
296     ha_enforce_handlerton(thd)
297     thd         current thread
298 
299   RETURN
300     pointer to handlerton
301 */
ha_enforce_handlerton(THD * thd)302 handlerton *ha_enforce_handlerton(THD* thd)
303 {
304   if (enforce_storage_engine)
305   {
306     LEX_STRING name= { enforce_storage_engine,
307       strlen(enforce_storage_engine) };
308     plugin_ref plugin= ha_resolve_by_name(thd, &name, FALSE);
309     if (plugin)
310     {
311       handlerton *hton= plugin_data(plugin, handlerton*);
312       DBUG_ASSERT(hton);
313       return hton;
314     }
315     else
316     {
317       my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), enforce_storage_engine,
318         enforce_storage_engine);
319     }
320   }
321   return NULL;
322 }
323 
ha_default_temp_plugin(THD * thd)324 static plugin_ref ha_default_temp_plugin(THD *thd)
325 {
326   if (thd->variables.temp_table_plugin)
327     return thd->variables.temp_table_plugin;
328   return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
329 }
330 
331 
332 /** @brief
333   Return the default storage engine handlerton used for explicitly
334   created temp tables for a thread
335 
336   SYNOPSIS
337     ha_default_temp_handlerton(thd)
338     thd         current thread
339 
340   RETURN
341     pointer to handlerton
342 */
ha_default_temp_handlerton(THD * thd)343 handlerton *ha_default_temp_handlerton(THD *thd)
344 {
345   plugin_ref plugin= ha_default_temp_plugin(thd);
346   DBUG_ASSERT(plugin);
347   handlerton *hton= plugin_data(plugin, handlerton*);
348   DBUG_ASSERT(hton);
349   return hton;
350 }
351 
352 
353 /** @brief
354   Return the storage engine handlerton for the supplied name
355 
356   SYNOPSIS
357     ha_resolve_by_name(thd, name)
358     thd         current thread
359     name        name of storage engine
360 
361   RETURN
362     pointer to storage engine plugin handle
363 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)364 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
365                               bool is_temp_table)
366 {
367   const LEX_STRING *table_alias;
368   plugin_ref plugin;
369 
370 redo:
371   /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
372   if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
373                            (const uchar *)name->str, name->length,
374                            (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
375     return is_temp_table ?
376       ha_default_plugin(thd) : ha_default_temp_plugin(thd);
377 
378   if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
379   {
380     handlerton *hton= plugin_data(plugin, handlerton *);
381     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
382       return plugin;
383 
384     /*
385       unlocking plugin immediately after locking is relatively low cost.
386     */
387     plugin_unlock(thd, plugin);
388   }
389 
390   /*
391     We check for the historical aliases.
392   */
393   for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
394   {
395     if (!my_strnncoll(&my_charset_latin1,
396                       (const uchar *)name->str, name->length,
397                       (const uchar *)table_alias->str, table_alias->length))
398     {
399       name= table_alias + 1;
400       goto redo;
401     }
402   }
403 
404   return NULL;
405 }
406 
407 
ha_lock_engine(THD * thd,const handlerton * hton)408 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
409 {
410   if (hton)
411   {
412     st_plugin_int **plugin= hton2plugin + hton->slot;
413 
414 #ifdef DBUG_OFF
415     return my_plugin_lock(thd, plugin);
416 #else
417     return my_plugin_lock(thd, &plugin);
418 #endif
419   }
420   return NULL;
421 }
422 
423 
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)424 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
425 {
426   plugin_ref plugin;
427   switch (db_type) {
428   case DB_TYPE_DEFAULT:
429     return ha_default_handlerton(thd);
430   default:
431     if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
432         (plugin= ha_lock_engine(thd, installed_htons[db_type])))
433       return plugin_data(plugin, handlerton*);
434     /* fall through */
435   case DB_TYPE_UNKNOWN:
436     return NULL;
437   }
438 }
439 
440 
441 /**
442   Use other database handler if databasehandler is not compiled in.
443 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)444 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
445                           bool no_substitute, bool report_error)
446 {
447   handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
448   if (ha_storage_engine_is_enabled(hton))
449     return hton;
450 
451   if (no_substitute)
452   {
453     if (report_error)
454     {
455       const char *engine_name= ha_resolve_storage_engine_name(hton);
456       my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
457     }
458     return NULL;
459   }
460 
461   (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
462 
463   switch (database_type) {
464   case DB_TYPE_MRG_ISAM:
465     return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
466   default:
467     break;
468   }
469 
470   return ha_default_handlerton(thd);
471 } /* ha_checktype */
472 
473 
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)474 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
475                          handlerton *db_type)
476 {
477   handler *file;
478   DBUG_ENTER("get_new_handler");
479   DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
480 
481   if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
482   {
483     if ((file= db_type->create(db_type, share, alloc)))
484       file->init();
485     DBUG_RETURN(file);
486   }
487   /*
488     Try the default table type
489     Here the call to current_thd() is ok as we call this function a lot of
490     times but we enter this branch very seldom.
491   */
492   DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
493 }
494 
495 
496 #ifdef WITH_PARTITION_STORAGE_ENGINE
get_ha_partition(partition_info * part_info)497 handler *get_ha_partition(partition_info *part_info)
498 {
499   ha_partition *partition;
500   DBUG_ENTER("get_ha_partition");
501   if ((partition= new ha_partition(partition_hton, part_info)))
502   {
503     if (partition->initialize_partition(current_thd->mem_root))
504     {
505       delete partition;
506       partition= 0;
507     }
508     else
509       partition->init();
510   }
511   else
512   {
513     my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR),
514              static_cast<int>(sizeof(ha_partition)));
515   }
516   DBUG_RETURN(((handler*) partition));
517 }
518 #endif
519 
520 
521 static const char **handler_errmsgs;
522 
523 C_MODE_START
get_handler_errmsgs()524 static const char **get_handler_errmsgs()
525 {
526   return handler_errmsgs;
527 }
528 C_MODE_END
529 
530 
531 /**
532   Register handler error messages for use with my_error().
533 
534   @retval
535     0           OK
536   @retval
537     !=0         Error
538 */
539 
ha_init_errors(void)540 int ha_init_errors(void)
541 {
542 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
543 
544   /* Allocate a pointer array for the error message strings. */
545   /* Zerofill it to avoid uninitialized gaps. */
546   if (! (handler_errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
547                                                    MYF(MY_WME | MY_ZEROFILL))))
548     return 1;
549 
550   /* Set the dedicated error messages. */
551   SETMSG(HA_ERR_KEY_NOT_FOUND,          ER_DEFAULT(ER_KEY_NOT_FOUND));
552   SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER_DEFAULT(ER_DUP_KEY));
553   SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
554   SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
555   SETMSG(HA_ERR_CRASHED,                ER_DEFAULT(ER_NOT_KEYFILE));
556   SETMSG(HA_ERR_WRONG_IN_RECORD,        ER_DEFAULT(ER_CRASHED_ON_USAGE));
557   SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
558   SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
559   SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
560   SETMSG(HA_ERR_OLD_FILE,               ER_DEFAULT(ER_OLD_KEYFILE));
561   SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
562   SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
563   SETMSG(HA_ERR_RECORD_FILE_FULL,       ER_DEFAULT(ER_RECORD_FILE_FULL));
564   SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
565   SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
566   SETMSG(HA_ERR_UNSUPPORTED,            ER_DEFAULT(ER_ILLEGAL_HA));
567   SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
568   SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
569   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER_DEFAULT(ER_DUP_UNIQUE));
570   SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
571   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER_DEFAULT(ER_WRONG_MRG_TABLE));
572   SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER_DEFAULT(ER_CRASHED_ON_REPAIR));
573   SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER_DEFAULT(ER_CRASHED_ON_USAGE));
574   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
575   SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER_DEFAULT(ER_LOCK_TABLE_FULL));
576   SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
577   SETMSG(HA_ERR_LOCK_DEADLOCK,          ER_DEFAULT(ER_LOCK_DEADLOCK));
578   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
579   SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
580   SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
581   SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
582   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
583   SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
584   SETMSG(HA_ERR_TABLE_EXIST,            ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
585   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
586   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
587   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
588   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
589   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
590   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
591   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
592   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
593   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,     ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
594   SETMSG(HA_ERR_INDEX_CORRUPT,          ER_DEFAULT(ER_INDEX_CORRUPT));
595   SETMSG(HA_FTS_INVALID_DOCID,          "Invalid InnoDB FTS Doc ID");
596   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
597   SETMSG(HA_ERR_TABLESPACE_EXISTS,      "Tablespace already exists");
598   SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,  "FTS query exceeds result cache limit");
599   SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,	ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
600   SETMSG(HA_ERR_INNODB_FORCED_RECOVERY,	ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
601   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
602   SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP,  "Too many nested sub-expressions in a full-text search");
603   /* Register the error messages for use with my_error(). */
604   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
605 }
606 
607 
608 /**
609   Unregister handler error messages.
610 
611   @retval
612     0           OK
613   @retval
614     !=0         Error
615 */
ha_finish_errors(void)616 static int ha_finish_errors(void)
617 {
618   const char    **errmsgs;
619 
620   /* Allocate a pointer array for the error message strings. */
621   if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
622     return 1;
623   my_free(errmsgs);
624   return 0;
625 }
626 
627 
ha_finalize_handlerton(st_plugin_int * plugin)628 int ha_finalize_handlerton(st_plugin_int *plugin)
629 {
630   handlerton *hton= (handlerton *)plugin->data;
631   DBUG_ENTER("ha_finalize_handlerton");
632 
633   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
634   if (!hton)
635     goto end;
636 
637   switch (hton->state)
638   {
639   case SHOW_OPTION_NO:
640   case SHOW_OPTION_DISABLED:
641     break;
642   case SHOW_OPTION_YES:
643     if (installed_htons[hton->db_type] == hton)
644       installed_htons[hton->db_type]= NULL;
645     break;
646   };
647 
648   if (hton->panic)
649     hton->panic(hton, HA_PANIC_CLOSE);
650 
651   if (plugin->plugin->deinit)
652   {
653     /*
654       Today we have no defined/special behavior for uninstalling
655       engine plugins.
656     */
657     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
658     if (plugin->plugin->deinit(NULL))
659     {
660       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
661                              plugin->name.str));
662     }
663   }
664 
665   /*
666     In case a plugin is uninstalled and re-installed later, it should
667     reuse an array slot. Otherwise the number of uninstall/install
668     cycles would be limited.
669   */
670   if (hton->slot != HA_SLOT_UNDEF)
671   {
672     /* Make sure we are not unpluging another plugin */
673     DBUG_ASSERT(hton2plugin[hton->slot] == plugin);
674     DBUG_ASSERT(hton->slot < MAX_HA);
675     hton2plugin[hton->slot]= NULL;
676   }
677 
678   my_free(hton);
679 
680  end:
681   DBUG_RETURN(0);
682 }
683 
684 
ha_initialize_handlerton(st_plugin_int * plugin)685 int ha_initialize_handlerton(st_plugin_int *plugin)
686 {
687   handlerton *hton;
688   DBUG_ENTER("ha_initialize_handlerton");
689   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
690 
691   hton= (handlerton *)my_malloc(sizeof(handlerton),
692                                 MYF(MY_WME | MY_ZEROFILL));
693 
694   if (hton == NULL)
695   {
696     sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
697                     plugin->name.str);
698     goto err_no_hton_memory;
699   }
700 
701   hton->slot= HA_SLOT_UNDEF;
702   /* Historical Requirement */
703   plugin->data= hton; // shortcut for the future
704   if (plugin->plugin->init && plugin->plugin->init(hton))
705   {
706     sql_print_error("Plugin '%s' init function returned error.",
707                     plugin->name.str);
708     goto err;
709   }
710 
711   /*
712     the switch below and hton->state should be removed when
713     command-line options for plugins will be implemented
714   */
715   DBUG_PRINT("info", ("hton->state=%d", hton->state));
716   switch (hton->state) {
717   case SHOW_OPTION_NO:
718     break;
719   case SHOW_OPTION_YES:
720     {
721       uint tmp;
722       ulong fslot;
723       /* now check the db_type for conflict */
724       if (hton->db_type <= DB_TYPE_UNKNOWN ||
725           hton->db_type >= DB_TYPE_DEFAULT ||
726           installed_htons[hton->db_type])
727       {
728         int idx= (int) DB_TYPE_FIRST_DYNAMIC;
729 
730         while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
731           idx++;
732 
733         if (idx == (int) DB_TYPE_DEFAULT)
734         {
735           sql_print_warning("Too many storage engines!");
736           goto err_deinit;
737         }
738         if (hton->db_type != DB_TYPE_UNKNOWN)
739           sql_print_warning("Storage engine '%s' has conflicting typecode. "
740                             "Assigning value %d.", plugin->plugin->name, idx);
741         hton->db_type= (enum legacy_db_type) idx;
742       }
743 
744       /*
745         In case a plugin is uninstalled and re-installed later, it should
746         reuse an array slot. Otherwise the number of uninstall/install
747         cycles would be limited. So look for a free slot.
748       */
749       DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
750       for (fslot= 0; fslot < total_ha; fslot++)
751       {
752         if (!hton2plugin[fslot])
753           break;
754       }
755       if (fslot < total_ha)
756         hton->slot= fslot;
757       else
758       {
759         if (total_ha >= MAX_HA)
760         {
761           sql_print_error("Too many plugins loaded. Limit is %lu. "
762                           "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
763           goto err_deinit;
764         }
765         hton->slot= total_ha++;
766       }
767       installed_htons[hton->db_type]= hton;
768       tmp= hton->savepoint_offset;
769       hton->savepoint_offset= savepoint_alloc_size;
770       savepoint_alloc_size+= tmp;
771       hton2plugin[hton->slot]=plugin;
772       if (hton->prepare)
773         total_ha_2pc++;
774       break;
775     }
776     /* fall through */
777   default:
778     hton->state= SHOW_OPTION_DISABLED;
779     break;
780   }
781 
782   /*
783     This is entirely for legacy. We will create a new "disk based" hton and a
784     "memory" hton which will be configurable longterm. We should be able to
785     remove partition and myisammrg.
786   */
787   switch (hton->db_type) {
788   case DB_TYPE_HEAP:
789     heap_hton= hton;
790     break;
791   case DB_TYPE_MYISAM:
792     myisam_hton= hton;
793     break;
794   case DB_TYPE_PARTITION_DB:
795     partition_hton= hton;
796     break;
797   default:
798     break;
799   };
800 
801   DBUG_RETURN(0);
802 
803 err_deinit:
804   /*
805     Let plugin do its inner deinitialization as plugin->init()
806     was successfully called before.
807   */
808   if (plugin->plugin->deinit)
809     (void) plugin->plugin->deinit(NULL);
810 
811 err:
812   my_free(hton);
813 err_no_hton_memory:
814   plugin->data= NULL;
815   DBUG_RETURN(1);
816 }
817 
ha_init()818 int ha_init()
819 {
820   int error= 0;
821   DBUG_ENTER("ha_init");
822 
823   DBUG_ASSERT(total_ha < MAX_HA);
824   /*
825     Check if there is a transaction-capable storage engine besides the
826     binary log (which is considered a transaction-capable storage engine in
827     counting total_ha)
828   */
829   opt_using_transactions= total_ha>(ulong)opt_bin_log;
830   savepoint_alloc_size+= sizeof(SAVEPOINT);
831 
832   /*
833     Initialize system database name cache.
834     This cache is used to do a quick check if a given
835     db.tablename is a system table.
836   */
837   known_system_databases= ha_known_system_databases();
838 
839   DBUG_RETURN(error);
840 }
841 
ha_end()842 int ha_end()
843 {
844   int error= 0;
845   DBUG_ENTER("ha_end");
846 
847 
848   /*
849     This should be eventualy based  on the graceful shutdown flag.
850     So if flag is equal to HA_PANIC_CLOSE, the deallocate
851     the errors.
852   */
853   if (ha_finish_errors())
854     error= 1;
855 
856   DBUG_RETURN(error);
857 }
858 
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)859 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
860                                  void *path)
861 {
862   handlerton *hton= plugin_data(plugin, handlerton *);
863   if (hton->state == SHOW_OPTION_YES && hton->drop_database)
864     hton->drop_database(hton, (char *)path);
865   return FALSE;
866 }
867 
868 
ha_drop_database(char * path)869 void ha_drop_database(char* path)
870 {
871   plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
872 }
873 
874 
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)875 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
876                                    void *unused)
877 {
878   handlerton *hton= plugin_data(plugin, handlerton *);
879   /*
880     there's no need to rollback here as all transactions must
881     be rolled back already
882   */
883   if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
884   {
885     if (hton->close_connection)
886       hton->close_connection(hton, thd);
887     /* make sure ha_data is reset and ha_data_lock is released */
888     thd_set_ha_data(thd, hton, NULL);
889   }
890   return FALSE;
891 }
892 
893 
894 /**
895   @note
896     don't bother to rollback here, it's done already
897 */
ha_close_connection(THD * thd)898 void ha_close_connection(THD* thd)
899 {
900   plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
901 }
902 
kill_handlerton(THD * thd,plugin_ref plugin,void *)903 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
904 {
905   handlerton *hton= plugin_data(plugin, handlerton *);
906 
907   if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
908   {
909     if (thd_get_ha_data(thd, hton))
910       hton->kill_connection(hton, thd);
911   }
912 
913   return FALSE;
914 }
915 
ha_kill_connection(THD * thd)916 void ha_kill_connection(THD *thd)
917 {
918   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
919 }
920 
921 /* ========================================================================
922  ======================= TRANSACTIONS ===================================*/
923 
924 /**
925   Transaction handling in the server
926   ==================================
927 
928   In each client connection, MySQL maintains two transactional
929   states:
930   - a statement transaction,
931   - a standard, also called normal transaction.
932 
933   Historical note
934   ---------------
935   "Statement transaction" is a non-standard term that comes
936   from the times when MySQL supported BerkeleyDB storage engine.
937 
938   First of all, it should be said that in BerkeleyDB auto-commit
939   mode auto-commits operations that are atomic to the storage
940   engine itself, such as a write of a record, and are too
941   high-granular to be atomic from the application perspective
942   (MySQL). One SQL statement could involve many BerkeleyDB
943   auto-committed operations and thus BerkeleyDB auto-commit was of
944   little use to MySQL.
945 
946   Secondly, instead of SQL standard savepoints, BerkeleyDB
947   provided the concept of "nested transactions". In a nutshell,
948   transactions could be arbitrarily nested, but when the parent
949   transaction was committed or aborted, all its child (nested)
950   transactions were handled committed or aborted as well.
951   Commit of a nested transaction, in turn, made its changes
952   visible, but not durable: it destroyed the nested transaction,
953   all its changes would become available to the parent and
954   currently active nested transactions of this parent.
955 
956   So the mechanism of nested transactions was employed to
957   provide "all or nothing" guarantee of SQL statements
958   required by the standard.
959   A nested transaction would be created at start of each SQL
960   statement, and destroyed (committed or aborted) at statement
961   end. Such nested transaction was internally referred to as
962   a "statement transaction" and gave birth to the term.
963 
964   (Historical note ends)
965 
966   Since then a statement transaction is started for each statement
967   that accesses transactional tables or uses the binary log.  If
968   the statement succeeds, the statement transaction is committed.
969   If the statement fails, the transaction is rolled back. Commits
970   of statement transactions are not durable -- each such
971   transaction is nested in the normal transaction, and if the
972   normal transaction is rolled back, the effects of all enclosed
973   statement transactions are undone as well.  Technically,
974   a statement transaction can be viewed as a savepoint which is
975   maintained automatically in order to make effects of one
976   statement atomic.
977 
978   The normal transaction is started by the user and is ended
979   usually upon a user request as well. The normal transaction
980   encloses transactions of all statements issued between
981   its beginning and its end.
982   In autocommit mode, the normal transaction is equivalent
983   to the statement transaction.
984 
985   Since MySQL supports PSEA (pluggable storage engine
986   architecture), more than one transactional engine can be
987   active at a time. Hence transactions, from the server
988   point of view, are always distributed. In particular,
989   transactional state is maintained independently for each
990   engine. In order to commit a transaction the two phase
991   commit protocol is employed.
992 
993   Not all statements are executed in context of a transaction.
994   Administrative and status information statements do not modify
995   engine data, and thus do not start a statement transaction and
996   also have no effect on the normal transaction. Examples of such
997   statements are SHOW STATUS and RESET SLAVE.
998 
999   Similarly DDL statements are not transactional,
1000   and therefore a transaction is [almost] never started for a DDL
1001   statement. The difference between a DDL statement and a purely
1002   administrative statement though is that a DDL statement always
1003   commits the current transaction before proceeding, if there is
1004   any.
1005 
1006   At last, SQL statements that work with non-transactional
1007   engines also have no effect on the transaction state of the
1008   connection. Even though they are written to the binary log,
1009   and the binary log is, overall, transactional, the writes
1010   are done in "write-through" mode, directly to the binlog
1011   file, followed with a OS cache sync, in other words,
1012   bypassing the binlog undo log (translog).
1013   They do not commit the current normal transaction.
1014   A failure of a statement that uses non-transactional tables
1015   would cause a rollback of the statement transaction, but
1016   in case there no non-transactional tables are used,
1017   no statement transaction is started.
1018 
1019   Data layout
1020   -----------
1021 
1022   The server stores its transaction-related data in
1023   thd->transaction. This structure has two members of type
1024   THD_TRANS. These members correspond to the statement and
1025   normal transactions respectively:
1026 
1027   - thd->transaction.stmt contains a list of engines
1028   that are participating in the given statement
1029   - thd->transaction.all contains a list of engines that
1030   have participated in any of the statement transactions started
1031   within the context of the normal transaction.
1032   Each element of the list contains a pointer to the storage
1033   engine, engine-specific transactional data, and engine-specific
1034   transaction flags.
1035 
1036   In autocommit mode thd->transaction.all is empty.
1037   Instead, data of thd->transaction.stmt is
1038   used to commit/rollback the normal transaction.
1039 
1040   The list of registered engines has a few important properties:
1041   - no engine is registered in the list twice
1042   - engines are present in the list a reverse temporal order --
1043   new participants are always added to the beginning of the list.
1044 
1045   Transaction life cycle
1046   ----------------------
1047 
1048   When a new connection is established, thd->transaction
1049   members are initialized to an empty state.
1050   If a statement uses any tables, all affected engines
1051   are registered in the statement engine list. In
1052   non-autocommit mode, the same engines are registered in
1053   the normal transaction list.
1054   At the end of the statement, the server issues a commit
1055   or a roll back for all engines in the statement list.
1056   At this point transaction flags of an engine, if any, are
1057   propagated from the statement list to the list of the normal
1058   transaction.
1059   When commit/rollback is finished, the statement list is
1060   cleared. It will be filled in again by the next statement,
1061   and emptied again at the next statement's end.
1062 
1063   The normal transaction is committed in a similar way
1064   (by going over all engines in thd->transaction.all list)
1065   but at different times:
1066   - upon COMMIT SQL statement is issued by the user
1067   - implicitly, by the server, at the beginning of a DDL statement
1068   or SET AUTOCOMMIT={0|1} statement.
1069 
1070   The normal transaction can be rolled back as well:
1071   - if the user has requested so, by issuing ROLLBACK SQL
1072   statement
1073   - if one of the storage engines requested a rollback
1074   by setting thd->transaction_rollback_request. This may
1075   happen in case, e.g., when the transaction in the engine was
1076   chosen a victim of the internal deadlock resolution algorithm
1077   and rolled back internally. When such a situation happens, there
1078   is little the server can do and the only option is to rollback
1079   transactions in all other participating engines.  In this case
1080   the rollback is accompanied by an error sent to the user.
1081 
1082   As follows from the use cases above, the normal transaction
1083   is never committed when there is an outstanding statement
1084   transaction. In most cases there is no conflict, since
1085   commits of the normal transaction are issued by a stand-alone
1086   administrative or DDL statement, thus no outstanding statement
1087   transaction of the previous statement exists. Besides,
1088   all statements that manipulate with the normal transaction
1089   are prohibited in stored functions and triggers, therefore
1090   no conflicting situation can occur in a sub-statement either.
1091   The remaining rare cases when the server explicitly has
1092   to commit the statement transaction prior to committing the normal
1093   one cover error-handling scenarios (see for example
1094   SQLCOM_LOCK_TABLES).
1095 
1096   When committing a statement or a normal transaction, the server
1097   either uses the two-phase commit protocol, or issues a commit
1098   in each engine independently. The two-phase commit protocol
1099   is used only if:
1100   - all participating engines support two-phase commit (provide
1101     handlerton::prepare PSEA API call) and
1102   - transactions in at least two engines modify data (i.e. are
1103   not read-only).
1104 
1105   Note that the two phase commit is used for
1106   statement transactions, even though they are not durable anyway.
1107   This is done to ensure logical consistency of data in a multiple-
1108   engine transaction.
1109   For example, imagine that some day MySQL supports unique
1110   constraint checks deferred till the end of statement. In such
1111   case a commit in one of the engines may yield ER_DUP_KEY,
1112   and MySQL should be able to gracefully abort statement
1113   transactions of other participants.
1114 
1115   After the normal transaction has been committed,
1116   thd->transaction.all list is cleared.
1117 
1118   When a connection is closed, the current normal transaction, if
1119   any, is rolled back.
1120 
1121   Roles and responsibilities
1122   --------------------------
1123 
1124   The server has no way to know that an engine participates in
1125   the statement and a transaction has been started
1126   in it unless the engine says so. Thus, in order to be
1127   a part of a transaction, the engine must "register" itself.
1128   This is done by invoking trans_register_ha() server call.
1129   Normally the engine registers itself whenever handler::external_lock()
1130   is called. trans_register_ha() can be invoked many times: if
1131   an engine is already registered, the call does nothing.
1132   In case autocommit is not set, the engine must register itself
1133   twice -- both in the statement list and in the normal transaction
1134   list.
1135   In which list to register is a parameter of trans_register_ha().
1136 
1137   Note, that although the registration interface in itself is
1138   fairly clear, the current usage practice often leads to undesired
1139   effects. E.g. since a call to trans_register_ha() in most engines
1140   is embedded into implementation of handler::external_lock(), some
1141   DDL statements start a transaction (at least from the server
1142   point of view) even though they are not expected to. E.g.
1143   CREATE TABLE does not start a transaction, since
1144   handler::external_lock() is never called during CREATE TABLE. But
1145   CREATE TABLE ... SELECT does, since handler::external_lock() is
1146   called for the table that is being selected from. This has no
1147   practical effects currently, but must be kept in mind
1148   nevertheless.
1149 
1150   Once an engine is registered, the server will do the rest
1151   of the work.
1152 
1153   During statement execution, whenever any of data-modifying
1154   PSEA API methods is used, e.g. handler::write_row() or
1155   handler::update_row(), the read-write flag is raised in the
1156   statement transaction for the involved engine.
1157   Currently All PSEA calls are "traced", and the data can not be
1158   changed in a way other than issuing a PSEA call. Important:
1159   unless this invariant is preserved the server will not know that
1160   a transaction in a given engine is read-write and will not
1161   involve the two-phase commit protocol!
1162 
1163   At the end of a statement, server call trans_commit_stmt is
1164   invoked. This call in turn invokes handlerton::prepare()
1165   for every involved engine. Prepare is followed by a call
1166   to handlerton::commit_one_phase() If a one-phase commit
1167   will suffice, handlerton::prepare() is not invoked and
1168   the server only calls handlerton::commit_one_phase().
1169   At statement commit, the statement-related read-write
1170   engine flag is propagated to the corresponding flag in the
1171   normal transaction.  When the commit is complete, the list
1172   of registered engines is cleared.
1173 
1174   Rollback is handled in a similar fashion.
1175 
1176   Additional notes on DDL and the normal transaction.
1177   ---------------------------------------------------
1178 
1179   DDLs and operations with non-transactional engines
1180   do not "register" in thd->transaction lists, and thus do not
1181   modify the transaction state. Besides, each DDL in
1182   MySQL is prefixed with an implicit normal transaction commit
1183   (a call to trans_commit_implicit()), and thus leaves nothing
1184   to modify.
1185   However, as it has been pointed out with CREATE TABLE .. SELECT,
1186   some DDL statements can start a *new* transaction.
1187 
1188   Behaviour of the server in this case is currently badly
1189   defined.
1190   DDL statements use a form of "semantic" logging
1191   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1192   the newly created table is deleted.
1193   In addition, some DDL statements issue interim transaction
1194   commits: e.g. ALTER TABLE issues a commit after data is copied
1195   from the original table to the internal temporary table. Other
1196   statements, e.g. CREATE TABLE ... SELECT do not always commit
1197   after itself.
1198   And finally there is a group of DDL statements such as
1199   RENAME/DROP TABLE that doesn't start a new transaction
1200   and doesn't commit.
1201 
1202   This diversity makes it hard to say what will happen if
1203   by chance a stored function is invoked during a DDL --
1204   whether any modifications it makes will be committed or not
1205   is not clear. Fortunately, SQL grammar of few DDLs allows
1206   invocation of a stored function.
1207 
1208   A consistent behaviour is perhaps to always commit the normal
1209   transaction after all DDLs, just like the statement transaction
1210   is always committed at the end of all statements.
1211 */
1212 
1213 /**
1214   Register a storage engine for a transaction.
1215 
1216   Every storage engine MUST call this function when it starts
1217   a transaction or a statement (that is it must be called both for the
1218   "beginning of transaction" and "beginning of statement").
1219   Only storage engines registered for the transaction/statement
1220   will know when to commit/rollback it.
1221 
1222   @note
1223     trans_register_ha is idempotent - storage engine may register many
1224     times per transaction.
1225 
1226 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg)1227 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
1228 {
1229   THD_TRANS *trans;
1230   Ha_trx_info *ha_info;
1231   DBUG_ENTER("trans_register_ha");
1232   DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1233 
1234   if (all)
1235   {
1236     trans= &thd->transaction.all;
1237     thd->server_status|= SERVER_STATUS_IN_TRANS;
1238     if (thd->tx_read_only)
1239       thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1240     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1241   }
1242   else
1243     trans= &thd->transaction.stmt;
1244 
1245   ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1246 
1247   if (ha_info->is_started())
1248     DBUG_VOID_RETURN; /* already registered, return */
1249 
1250   ha_info->register_ha(trans, ht_arg);
1251 
1252   trans->no_2pc|=(ht_arg->prepare==0);
1253   if (thd->transaction.xid_state.xid.is_null())
1254     thd->transaction.xid_state.xid.set(thd->query_id);
1255   DBUG_VOID_RETURN;
1256 }
1257 
1258 /**
1259   @retval
1260     0   ok
1261   @retval
1262     1   error, transaction was rolled back
1263 */
ha_prepare(THD * thd)1264 int ha_prepare(THD *thd)
1265 {
1266   int error=0, all=1;
1267   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1268   Ha_trx_info *ha_info= trans->ha_list;
1269   DBUG_ENTER("ha_prepare");
1270 
1271   if (ha_info)
1272   {
1273     for (; ha_info; ha_info= ha_info->next())
1274     {
1275       int err;
1276       handlerton *ht= ha_info->ht();
1277       DBUG_ASSERT(!thd->status_var_aggregated);
1278       status_var_increment(thd->status_var.ha_prepare_count);
1279       if (ht->prepare)
1280       {
1281         if ((err= ht->prepare(ht, thd, all)))
1282         {
1283           my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1284           ha_rollback_trans(thd, all);
1285           error=1;
1286           break;
1287         }
1288       }
1289       else
1290       {
1291         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1292                             ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1293                             ha_resolve_storage_engine_name(ht));
1294       }
1295     }
1296   }
1297 
1298   DBUG_RETURN(error);
1299 }
1300 
1301 /**
1302   Check if we can skip the two-phase commit.
1303 
1304   A helper function to evaluate if two-phase commit is mandatory.
1305   As a side effect, propagates the read-only/read-write flags
1306   of the statement transaction to its enclosing normal transaction.
1307 
1308   If we have at least two engines with read-write changes we must
1309   run a two-phase commit. Otherwise we can run several independent
1310   commits as the only transactional engine has read-write changes
1311   and others are read-only.
1312 
1313   @retval   0   All engines are read-only.
1314   @retval   1   We have the only engine with read-write changes.
1315   @retval   >1  More than one engine have read-write changes.
1316                 Note: return value might NOT be the exact number of
1317                 engines with read-write changes.
1318 */
1319 
1320 static
1321 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1322 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1323                                     bool all)
1324 {
1325   /* The number of storage engines that have actual changes. */
1326   unsigned rw_ha_count= 0;
1327   Ha_trx_info *ha_info;
1328 
1329   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1330   {
1331     if (ha_info->is_trx_read_write())
1332       ++rw_ha_count;
1333     else
1334     {
1335       /*
1336         If we have any fake changes handlertons, they will not be marked as
1337         read-write, potentially skipping 2PC and causing the fake transaction
1338         to be binlogged.  Force using 2PC in this case by bumping rw_ha_count
1339         for each fake changes handlerton.
1340        */
1341       handlerton *ht= ha_info->ht();
1342       if (unlikely(ht->is_fake_change && ht->is_fake_change(ht, thd)))
1343         ++rw_ha_count;
1344     }
1345 
1346     if (! all)
1347     {
1348       Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1349       DBUG_ASSERT(ha_info != ha_info_all);
1350       /*
1351         Merge read-only/read-write information about statement
1352         transaction to its enclosing normal transaction. Do this
1353         only if in a real transaction -- that is, if we know
1354         that ha_info_all is registered in thd->transaction.all.
1355         Since otherwise we only clutter the normal transaction flags.
1356       */
1357       if (ha_info_all->is_started()) /* FALSE if autocommit. */
1358         ha_info_all->coalesce_trx_with(ha_info);
1359     }
1360     else if (rw_ha_count > 1)
1361     {
1362       /*
1363         It is a normal transaction, so we don't need to merge read/write
1364         information up, and the need for two-phase commit has been
1365         already established. Break the loop prematurely.
1366       */
1367       break;
1368     }
1369   }
1370   return rw_ha_count;
1371 }
1372 
1373 
1374 /**
1375   @param[in] ignore_global_read_lock   Allow commit to complete even if a
1376                                        global read lock is active. This can be
1377                                        used to allow changes to internal tables
1378                                        (e.g. slave status tables).
1379 
1380   @retval
1381     0   ok
1382   @retval
1383     1   transaction was rolled back
1384   @retval
1385     2   error during commit, data may be inconsistent
1386 
1387   @todo
1388     Since we don't support nested statement transactions in 5.0,
1389     we can't commit or rollback stmt transactions while we are inside
1390     stored functions or triggers. So we simply do nothing now.
1391     TODO: This should be fixed in later ( >= 5.1) releases.
1392 */
1393 
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1394 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1395 {
1396   int error= 0;
1397   /*
1398     'all' means that this is either an explicit commit issued by
1399     user, or an implicit commit issued by a DDL.
1400   */
1401   THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
1402   /*
1403     "real" is a nick name for a transaction for which a commit will
1404     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1405     transation is not 'real': even though it's possible to commit it,
1406     the changes are not durable as they might be rolled back if the
1407     enclosing 'all' transaction is rolled back.
1408   */
1409   bool is_real_trans= all || thd->transaction.all.ha_list == 0;
1410   Ha_trx_info *ha_info= trans->ha_list;
1411   DBUG_ENTER("ha_commit_trans");
1412 
1413   DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1414                       all, thd->in_sub_stmt, ha_info, is_real_trans));
1415   /*
1416     We must not commit the normal transaction if a statement
1417     transaction is pending. Otherwise statement transaction
1418     flags will not get propagated to its normal transaction's
1419     counterpart.
1420   */
1421   DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1422               trans == &thd->transaction.stmt);
1423 
1424   if (thd->in_sub_stmt)
1425   {
1426     DBUG_ASSERT(0);
1427     /*
1428       Since we don't support nested statement transactions in 5.0,
1429       we can't commit or rollback stmt transactions while we are inside
1430       stored functions or triggers. So we simply do nothing now.
1431       TODO: This should be fixed in later ( >= 5.1) releases.
1432     */
1433     if (!all)
1434       DBUG_RETURN(0);
1435     /*
1436       We assume that all statements which commit or rollback main transaction
1437       are prohibited inside of stored functions or triggers. So they should
1438       bail out with error even before ha_commit_trans() call. To be 100% safe
1439       let us throw error in non-debug builds.
1440     */
1441     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1442     DBUG_RETURN(2);
1443   }
1444 
1445   MDL_request mdl_request;
1446   bool release_mdl= false;
1447 
1448   if (ha_info)
1449   {
1450     uint rw_ha_count;
1451     bool rw_trans;
1452 
1453     DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1454 
1455     /* Close all cursors that can not survive COMMIT */
1456     if (is_real_trans)                          /* not a statement commit */
1457       thd->stmt_map.close_transient_cursors();
1458 
1459     rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1460     trans->rw_ha_count= rw_ha_count;
1461     /* rw_trans is TRUE when we in a transaction changing data */
1462     rw_trans= is_real_trans && (rw_ha_count > 0);
1463 
1464     DBUG_EXECUTE_IF("dbug.enabled_commit",
1465                     {
1466                       const char act[]= "now signal Reached wait_for signal.commit_continue";
1467                       DBUG_ASSERT(!debug_sync_set_action(current_thd,
1468                                                          STRING_WITH_LEN(act)));
1469                     };);
1470     if (rw_trans && !ignore_global_read_lock)
1471     {
1472       /*
1473         Acquire a metadata lock which will ensure that COMMIT is blocked
1474         by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1475         COMMIT in progress blocks FTWRL).
1476 
1477         We allow the owner of FTWRL to COMMIT; we assume that it knows
1478         what it does.
1479       */
1480       mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1481                        MDL_EXPLICIT);
1482 
1483       DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1484       if (thd->mdl_context.acquire_lock(&mdl_request,
1485                                         thd->variables.lock_wait_timeout))
1486       {
1487         ha_rollback_trans(thd, all);
1488         DBUG_RETURN(1);
1489       }
1490       release_mdl= true;
1491 
1492       DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1493     }
1494 
1495     bool enforce_ro= true;
1496     if (!opt_super_readonly)
1497       enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);
1498     /*
1499       Ignore super_read_only when ignore_global_read_lock is set.
1500       ignore_global_read_lock is set for transactions on replication
1501       repository tables.
1502     */
1503     if (ignore_global_read_lock)
1504       enforce_ro= false;
1505     if (rw_trans && stmt_has_updated_trans_table(ha_info) &&
1506         opt_readonly &&
1507         enforce_ro &&
1508         !thd->slave_thread)
1509     {
1510       my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
1511                opt_super_readonly ? "--read-only (super)" : "--read-only");
1512       ha_rollback_trans(thd, all);
1513       error= 1;
1514       goto end;
1515     }
1516 
1517     if (!trans->no_2pc && (rw_ha_count > 1))
1518       error= tc_log->prepare(thd, all);
1519   }
1520   if (error || (error= tc_log->commit(thd, all)))
1521   {
1522     ha_rollback_trans(thd, all);
1523     error= 1;
1524     goto end;
1525   }
1526   DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
1527 end:
1528   if (release_mdl && mdl_request.ticket)
1529   {
1530     /*
1531       We do not always immediately release transactional locks
1532       after ha_commit_trans() (see uses of ha_enable_transaction()),
1533       thus we release the commit blocker lock as soon as it's
1534       not needed.
1535     */
1536     DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1537     thd->mdl_context.release_lock(mdl_request.ticket);
1538   }
1539   /* Free resources and perform other cleanup even for 'empty' transactions. */
1540   if (is_real_trans)
1541     thd->transaction.cleanup();
1542 
1543   if (!error)
1544     thd->diff_commit_trans++;
1545 
1546   DBUG_RETURN(error);
1547 }
1548 
1549 /**
1550   Commit the sessions outstanding transaction.
1551 
1552   @pre thd->transaction.flags.commit_low == true
1553   @post thd->transaction.flags.commit_low == false
1554 
1555   @note This function does not care about global read lock; the caller
1556   should.
1557 
1558   @param[in]  all  Is set in case of explicit commit
1559                    (COMMIT statement), or implicit commit
1560                    issued by DDL. Is not set when called
1561                    at the end of statement, even if
1562                    autocommit=1.
1563   @param[in]  run_after_commit
1564                    True by default, otherwise, does not execute
1565                    the after_commit hook in the function.
1566 */
1567 
ha_commit_low(THD * thd,bool all,bool run_after_commit)1568 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1569 {
1570   int error=0;
1571   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1572   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1573   DBUG_ENTER("ha_commit_low");
1574 
1575   if (ha_info)
1576   {
1577     for (; ha_info; ha_info= ha_info_next)
1578     {
1579       int err;
1580       handlerton *ht= ha_info->ht();
1581       if ((err= ht->commit(ht, thd, all)))
1582       {
1583         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1584         error=1;
1585       }
1586       DBUG_ASSERT(!thd->status_var_aggregated);
1587       status_var_increment(thd->status_var.ha_commit_count);
1588       ha_info_next= ha_info->next();
1589       ha_info->reset(); /* keep it conveniently zero-filled */
1590     }
1591     trans->ha_list= 0;
1592     trans->no_2pc=0;
1593     trans->rw_ha_count= 0;
1594     if (all)
1595     {
1596 #ifdef HAVE_QUERY_CACHE
1597       if (thd->transaction.changed_tables)
1598         query_cache.invalidate(thd->transaction.changed_tables);
1599 #endif
1600     }
1601   }
1602   /* Free resources and perform other cleanup even for 'empty' transactions. */
1603   if (all)
1604     thd->transaction.cleanup();
1605   /*
1606     When the transaction has been committed, we clear the commit_low
1607     flag. This allow other parts of the system to check if commit_low
1608     was called.
1609   */
1610   thd->transaction.flags.commit_low= false;
1611   if (run_after_commit && thd->transaction.flags.run_hooks)
1612   {
1613     /*
1614        If commit succeeded, we call the after_commit hook.
1615 
1616        TODO: Investigate if this can be refactored so that there is
1617              only one invocation of this hook in the code (in
1618              MYSQL_LOG_BIN::finish_commit).
1619     */
1620     if (!error)
1621       (void) RUN_HOOK(transaction, after_commit, (thd, all));
1622     thd->transaction.flags.run_hooks= false;
1623   }
1624   DBUG_RETURN(error);
1625 }
1626 
1627 
ha_rollback_low(THD * thd,bool all)1628 int ha_rollback_low(THD *thd, bool all)
1629 {
1630   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1631   Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1632   int error= 0;
1633 
1634   if (ha_info)
1635   {
1636     /* Close all cursors that can not survive ROLLBACK */
1637     if (all)                          /* not a statement commit */
1638       thd->stmt_map.close_transient_cursors();
1639 
1640     for (; ha_info; ha_info= ha_info_next)
1641     {
1642       int err;
1643       handlerton *ht= ha_info->ht();
1644       if ((err= ht->rollback(ht, thd, all)))
1645       { // cannot happen
1646         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1647         error= 1;
1648       }
1649       DBUG_ASSERT(!thd->status_var_aggregated);
1650       status_var_increment(thd->status_var.ha_rollback_count);
1651       ha_info_next= ha_info->next();
1652       ha_info->reset(); /* keep it conveniently zero-filled */
1653     }
1654     trans->ha_list= 0;
1655     trans->no_2pc=0;
1656     trans->rw_ha_count= 0;
1657   }
1658 
1659   /*
1660     Thanks to possibility of MDL deadlock rollback request can come even if
1661     transaction hasn't been started in any transactional storage engine.
1662   */
1663   if (all && thd->transaction_rollback_request &&
1664       thd->transaction.xid_state.xa_state != XA_NOTR)
1665     thd->transaction.xid_state.rm_error= thd->get_stmt_da()->sql_errno();
1666 
1667   (void) RUN_HOOK(transaction, after_rollback, (thd, all));
1668   return error;
1669 }
1670 
1671 
ha_rollback_trans(THD * thd,bool all)1672 int ha_rollback_trans(THD *thd, bool all)
1673 {
1674   int error=0;
1675 #ifndef DBUG_OFF
1676   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1677 #endif
1678   /*
1679     "real" is a nick name for a transaction for which a commit will
1680     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1681     transaction is not 'real': even though it's possible to commit it,
1682     the changes are not durable as they might be rolled back if the
1683     enclosing 'all' transaction is rolled back.
1684     We establish the value of 'is_real_trans' by checking
1685     if it's an explicit COMMIT or BEGIN statement, or implicit
1686     commit issued by DDL (in these cases all == TRUE),
1687     or if we're running in autocommit mode (it's only in the autocommit mode
1688     ha_commit_one_phase() is called with an empty
1689     transaction.all.ha_list, see why in trans_register_ha()).
1690   */
1691   bool is_real_trans= all || thd->transaction.all.ha_list == NULL;
1692   DBUG_ENTER("ha_rollback_trans");
1693 
1694   /*
1695     We must not rollback the normal transaction if a statement
1696     transaction is pending.
1697   */
1698   DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1699               trans == &thd->transaction.stmt);
1700 
1701   if (thd->in_sub_stmt)
1702   {
1703     DBUG_ASSERT(0);
1704     /*
1705       If we are inside stored function or trigger we should not commit or
1706       rollback current statement transaction. See comment in ha_commit_trans()
1707       call for more information.
1708     */
1709     if (!all)
1710       DBUG_RETURN(0);
1711     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1712     DBUG_RETURN(1);
1713   }
1714 
1715   if (tc_log)
1716     tc_log->rollback(thd, all);
1717 
1718   /* Always cleanup. Even if nht==0. There may be savepoints. */
1719   if (is_real_trans)
1720     thd->transaction.cleanup();
1721 
1722   thd->diff_rollback_trans++;
1723   if (all)
1724     thd->transaction_rollback_request= FALSE;
1725 
1726   /*
1727     Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
1728     complete transaction is being rollback or autocommit=1.
1729   */
1730   if (is_real_trans)
1731     gtid_rollback(thd);
1732 
1733   /*
1734     If the transaction cannot be rolled back safely, warn; don't warn if this
1735     is a slave thread (because when a slave thread executes a ROLLBACK, it has
1736     been read from the binary log, so it's 100% sure and normal to produce
1737     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1738     slave SQL thread, it would not stop the thread but just be printed in
1739     the error log; but we don't want users to wonder why they have this
1740     message in the error log, so we don't send it.
1741   */
1742 #ifndef DBUG_OFF
1743   thd->transaction.stmt.dbug_unsafe_rollback_flags("stmt");
1744   thd->transaction.all.dbug_unsafe_rollback_flags("all");
1745 #endif
1746   if (is_real_trans && thd->transaction.all.cannot_safely_rollback() &&
1747       !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1748     thd->transaction.push_unsafe_rollback_warnings(thd);
1749   DBUG_RETURN(error);
1750 }
1751 
1752 
1753 struct xahton_st {
1754   XID *xid;
1755   int result;
1756 };
1757 
xacommit_handlerton(THD * unused1,plugin_ref plugin,void * arg)1758 static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1759                                    void *arg)
1760 {
1761   handlerton *hton= plugin_data(plugin, handlerton *);
1762   if (hton->state == SHOW_OPTION_YES && hton->recover)
1763   {
1764     hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1765     ((struct xahton_st *)arg)->result= 0;
1766   }
1767   return FALSE;
1768 }
1769 
xarollback_handlerton(THD * unused1,plugin_ref plugin,void * arg)1770 static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1771                                      void *arg)
1772 {
1773   handlerton *hton= plugin_data(plugin, handlerton *);
1774   if (hton->state == SHOW_OPTION_YES && hton->recover)
1775   {
1776     hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1777     ((struct xahton_st *)arg)->result= 0;
1778   }
1779   return FALSE;
1780 }
1781 
1782 
ha_commit_or_rollback_by_xid(THD * thd,XID * xid,bool commit)1783 int ha_commit_or_rollback_by_xid(THD *thd, XID *xid, bool commit)
1784 {
1785   struct xahton_st xaop;
1786   xaop.xid= xid;
1787   xaop.result= 1;
1788 
1789   plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1790                  MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1791 
1792   gtid_rollback(thd);
1793 
1794   return xaop.result;
1795 }
1796 
1797 
1798 #ifndef DBUG_OFF
1799 /**
1800   @note
1801     This does not need to be multi-byte safe or anything
1802 */
xid_to_str(char * buf,XID * xid)1803 static char* xid_to_str(char *buf, XID *xid)
1804 {
1805   int i;
1806   char *s=buf;
1807   *s++='\'';
1808   for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
1809   {
1810     uchar c=(uchar)xid->data[i];
1811     /* is_next_dig is set if next character is a number */
1812     bool is_next_dig= FALSE;
1813     if (i < XIDDATASIZE)
1814     {
1815       char ch= xid->data[i+1];
1816       is_next_dig= (ch >= '0' && ch <='9');
1817     }
1818     if (i == xid->gtrid_length)
1819     {
1820       *s++='\'';
1821       if (xid->bqual_length)
1822       {
1823         *s++='.';
1824         *s++='\'';
1825       }
1826     }
1827     if (c < 32 || c > 126)
1828     {
1829       *s++='\\';
1830       /*
1831         If next character is a number, write current character with
1832         3 octal numbers to ensure that the next number is not seen
1833         as part of the octal number
1834       */
1835       if (c > 077 || is_next_dig)
1836         *s++=_dig_vec_lower[c >> 6];
1837       if (c > 007 || is_next_dig)
1838         *s++=_dig_vec_lower[(c >> 3) & 7];
1839       *s++=_dig_vec_lower[c & 7];
1840     }
1841     else
1842     {
1843       if (c == '\'' || c == '\\')
1844         *s++='\\';
1845       *s++=c;
1846     }
1847   }
1848   *s++='\'';
1849   *s=0;
1850   return buf;
1851 }
1852 #endif
1853 
1854 /**
1855   recover() step of xa.
1856 
1857   @note
1858     there are three modes of operation:
1859     - automatic recover after a crash
1860     in this case commit_list != 0, tc_heuristic_recover==0
1861     all xids from commit_list are committed, others are rolled back
1862     - manual (heuristic) recover
1863     in this case commit_list==0, tc_heuristic_recover != 0
1864     DBA has explicitly specified that all prepared transactions should
1865     be committed (or rolled back).
1866     - no recovery (MySQL did not detect a crash)
1867     in this case commit_list==0, tc_heuristic_recover == 0
1868     there should be no prepared transactions in this case.
1869 */
1870 struct xarecover_st
1871 {
1872   int len, found_foreign_xids, found_my_xids;
1873   XID *list;
1874   HASH *commit_list;
1875   bool dry_run;
1876 };
1877 
xarecover_handlerton(THD * unused,plugin_ref plugin,void * arg)1878 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1879                                     void *arg)
1880 {
1881   handlerton *hton= plugin_data(plugin, handlerton *);
1882   struct xarecover_st *info= (struct xarecover_st *) arg;
1883   int got;
1884 
1885   if (hton->state == SHOW_OPTION_YES && hton->recover)
1886   {
1887     while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1888     {
1889       sql_print_information("Found %d prepared transaction(s) in %s",
1890                             got, ha_resolve_storage_engine_name(hton));
1891       for (int i=0; i < got; i ++)
1892       {
1893         my_xid x=info->list[i].get_my_xid();
1894         if (!x) // not "mine" - that is generated by external TM
1895         {
1896 #ifndef DBUG_OFF
1897           char buf[XIDDATASIZE*4+6]; // see xid_to_str
1898           sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i));
1899 #endif
1900           xid_cache_insert(info->list+i, XA_PREPARED);
1901           info->found_foreign_xids++;
1902           continue;
1903         }
1904         if (info->dry_run)
1905         {
1906           info->found_my_xids++;
1907           continue;
1908         }
1909         // recovery mode
1910         if (info->commit_list ?
1911             my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1912             tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1913         {
1914 #ifndef DBUG_OFF
1915           char buf[XIDDATASIZE*4+6]; // see xid_to_str
1916           sql_print_information("commit xid %s", xid_to_str(buf, info->list+i));
1917 #endif
1918           hton->commit_by_xid(hton, info->list+i);
1919         }
1920         else
1921         {
1922 #ifndef DBUG_OFF
1923           char buf[XIDDATASIZE*4+6]; // see xid_to_str
1924           sql_print_information("rollback xid %s",
1925                                 xid_to_str(buf, info->list+i));
1926 #endif
1927           hton->rollback_by_xid(hton, info->list+i);
1928         }
1929       }
1930       if (got < info->len)
1931         break;
1932     }
1933   }
1934   return FALSE;
1935 }
1936 
ha_recover(HASH * commit_list)1937 int ha_recover(HASH *commit_list)
1938 {
1939   struct xarecover_st info;
1940   DBUG_ENTER("ha_recover");
1941   info.found_foreign_xids= info.found_my_xids= 0;
1942   info.commit_list= commit_list;
1943   info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1944   info.list= NULL;
1945 
1946   /* commit_list and tc_heuristic_recover cannot be set both */
1947   DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
1948   /* if either is set, total_ha_2pc must be set too */
1949   DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log);
1950 
1951   if (total_ha_2pc <= (ulong)opt_bin_log)
1952     DBUG_RETURN(0);
1953 
1954   if (info.commit_list)
1955     sql_print_information("Starting crash recovery...");
1956 
1957 #if 0
1958   /*
1959     for now, only InnoDB supports 2pc. It means we can always safely
1960     rollback all pending transactions, without risking inconsistent data
1961   */
1962   DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
1963   tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
1964   info.dry_run=FALSE;
1965 #endif
1966 
1967   for (info.len= MAX_XID_LIST_SIZE ;
1968        info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1969   {
1970     info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1971   }
1972   if (!info.list)
1973   {
1974     sql_print_error(ER(ER_OUTOFMEMORY),
1975                     static_cast<int>(info.len*sizeof(XID)));
1976     DBUG_RETURN(1);
1977   }
1978 
1979   plugin_foreach(NULL, xarecover_handlerton,
1980                  MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1981 
1982   my_free(info.list);
1983   if (info.found_foreign_xids)
1984     sql_print_warning("Found %d prepared XA transactions",
1985                       info.found_foreign_xids);
1986   if (info.dry_run && info.found_my_xids)
1987   {
1988     sql_print_error("Found %d prepared transactions! It means that mysqld was "
1989                     "not shut down properly last time and critical recovery "
1990                     "information (last binlog or %s file) was manually deleted "
1991                     "after a crash. You have to start mysqld with "
1992                     "--tc-heuristic-recover switch to commit or rollback "
1993                     "pending transactions.",
1994                     info.found_my_xids, opt_tc_log_file);
1995     DBUG_RETURN(1);
1996   }
1997   if (info.commit_list)
1998     sql_print_information("Crash recovery finished.");
1999   DBUG_RETURN(0);
2000 }
2001 
2002 /**
2003   return the list of XID's to a client, the same way SHOW commands do.
2004 
2005   @note
2006     I didn't find in XA specs that an RM cannot return the same XID twice,
2007     so mysql_xa_recover does not filter XID's to ensure uniqueness.
2008     It can be easily fixed later, if necessary.
2009 */
mysql_xa_recover(THD * thd)2010 bool mysql_xa_recover(THD *thd)
2011 {
2012   List<Item> field_list;
2013   Protocol *protocol= thd->protocol;
2014   int i=0;
2015   XID_STATE *xs;
2016   DBUG_ENTER("mysql_xa_recover");
2017 
2018   field_list.push_back(new Item_int(NAME_STRING("formatID"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2019   field_list.push_back(new Item_int(NAME_STRING("gtrid_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2020   field_list.push_back(new Item_int(NAME_STRING("bqual_length"), 0, MY_INT32_NUM_DECIMAL_DIGITS));
2021   field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
2022 
2023   if (protocol->send_result_set_metadata(&field_list,
2024                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2025     DBUG_RETURN(1);
2026 
2027   mysql_mutex_lock(&LOCK_xid_cache);
2028   while ((xs= (XID_STATE*) my_hash_element(&xid_cache, i++)))
2029   {
2030     if (xs->xa_state==XA_PREPARED)
2031     {
2032       protocol->prepare_for_resend();
2033       protocol->store_longlong((longlong)xs->xid.formatID, FALSE);
2034       protocol->store_longlong((longlong)xs->xid.gtrid_length, FALSE);
2035       protocol->store_longlong((longlong)xs->xid.bqual_length, FALSE);
2036       protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
2037                       &my_charset_bin);
2038       if (protocol->write())
2039       {
2040         mysql_mutex_unlock(&LOCK_xid_cache);
2041         DBUG_RETURN(1);
2042       }
2043     }
2044   }
2045 
2046   mysql_mutex_unlock(&LOCK_xid_cache);
2047   my_eof(thd);
2048   DBUG_RETURN(0);
2049 }
2050 
2051 /**
2052   @details
2053   This function should be called when MySQL sends rows of a SELECT result set
2054   or the EOF mark to the client. It releases a possible adaptive hash index
2055   S-latch held by thd in InnoDB and also releases a possible InnoDB query
2056   FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2057   keep them over several calls of the InnoDB handler interface when a join
2058   is executed. But when we let the control to pass to the client they have
2059   to be released because if the application program uses mysql_use_result(),
2060   it may deadlock on the S-latch if the application on another connection
2061   performs another SQL query. In MySQL-4.1 this is even more important because
2062   there a connection can have several SELECT queries open at the same time.
2063 
2064   @param thd           the thread handle of the current connection
2065 
2066   @return
2067     always 0
2068 */
2069 
ha_release_temporary_latches(THD * thd)2070 int ha_release_temporary_latches(THD *thd)
2071 {
2072   Ha_trx_info *info;
2073 
2074   /*
2075     Note that below we assume that only transactional storage engines
2076     may need release_temporary_latches(). If this will ever become false,
2077     we could iterate on thd->open_tables instead (and remove duplicates
2078     as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2079   */
2080   for (info= thd->transaction.stmt.ha_list; info; info= info->next())
2081   {
2082     handlerton *hton= info->ht();
2083     if (hton && hton->release_temporary_latches)
2084         hton->release_temporary_latches(hton, thd);
2085   }
2086   return 0;
2087 }
2088 
2089 /**
2090   Check if all storage engines used in transaction agree that after
2091   rollback to savepoint it is safe to release MDL locks acquired after
2092   savepoint creation.
2093 
2094   @param thd   The client thread that executes the transaction.
2095 
2096   @return true  - It is safe to release MDL locks.
2097           false - If it is not.
2098 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2099 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2100 {
2101   Ha_trx_info *ha_info;
2102   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2103                                         &thd->transaction.all);
2104 
2105   DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2106 
2107   /**
2108     Checking whether it is safe to release metadata locks after rollback to
2109     savepoint in all the storage engines that are part of the transaction.
2110   */
2111   for (ha_info= trans->ha_list; ha_info; ha_info= ha_info->next())
2112   {
2113     handlerton *ht= ha_info->ht();
2114     DBUG_ASSERT(ht);
2115 
2116     if (ht->savepoint_rollback_can_release_mdl == 0 ||
2117         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2118       DBUG_RETURN(false);
2119   }
2120 
2121   DBUG_RETURN(true);
2122 }
2123 
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2124 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2125 {
2126   int error=0;
2127   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2128                                         &thd->transaction.all);
2129   Ha_trx_info *ha_info, *ha_info_next;
2130 
2131   DBUG_ENTER("ha_rollback_to_savepoint");
2132 
2133   trans->no_2pc=0;
2134   trans->rw_ha_count= 0;
2135   /*
2136     rolling back to savepoint in all storage engines that were part of the
2137     transaction when the savepoint was set
2138   */
2139   for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2140   {
2141     int err;
2142     handlerton *ht= ha_info->ht();
2143     DBUG_ASSERT(ht);
2144     DBUG_ASSERT(ht->savepoint_set != 0);
2145     if ((err= ht->savepoint_rollback(ht, thd,
2146                                      (uchar *)(sv+1)+ht->savepoint_offset)))
2147     { // cannot happen
2148       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2149       error=1;
2150     }
2151     DBUG_ASSERT(!thd->status_var_aggregated);
2152     status_var_increment(thd->status_var.ha_savepoint_rollback_count);
2153     trans->no_2pc|= ht->prepare == 0;
2154   }
2155   /*
2156     rolling back the transaction in all storage engines that were not part of
2157     the transaction when the savepoint was set
2158   */
2159   for (ha_info= trans->ha_list; ha_info != sv->ha_list;
2160        ha_info= ha_info_next)
2161   {
2162     int err;
2163     handlerton *ht= ha_info->ht();
2164     if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2165     { // cannot happen
2166       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2167       error=1;
2168     }
2169     DBUG_ASSERT(!thd->status_var_aggregated);
2170     status_var_increment(thd->status_var.ha_rollback_count);
2171     ha_info_next= ha_info->next();
2172     ha_info->reset(); /* keep it conveniently zero-filled */
2173   }
2174   trans->ha_list= sv->ha_list;
2175   thd->diff_rollback_trans++;
2176   DBUG_RETURN(error);
2177 }
2178 
ha_prepare_low(THD * thd,bool all)2179 int ha_prepare_low(THD *thd, bool all)
2180 {
2181   int error= 0;
2182   THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
2183   Ha_trx_info *ha_info= trans->ha_list;
2184   DBUG_ENTER("ha_prepare_low");
2185 
2186   if (ha_info)
2187   {
2188     for (; ha_info && !error; ha_info= ha_info->next())
2189     {
2190       int err= 0;
2191       handlerton *ht= ha_info->ht();
2192       /*
2193         Do not call two-phase commit if this particular
2194         transaction is read-only. This allows for simpler
2195         implementation in engines that are always read-only.
2196       */
2197       /*
2198         But do call two-phase commit if the handlerton has fake changes
2199         enabled even if it's not marked as read-write.  This will ensure that
2200         the fake changes handlerton prepare will fail, preventing binlogging
2201         and committing the transaction in other engines.
2202       */
2203       if (!ha_info->is_trx_read_write()
2204           && likely(!(ht->is_fake_change && ht->is_fake_change(ht, thd))))
2205         continue;
2206       if ((err= ht->prepare(ht, thd, all)))
2207       {
2208         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2209         error= 1;
2210       }
2211       DBUG_ASSERT(!thd->status_var_aggregated);
2212       status_var_increment(thd->status_var.ha_prepare_count);
2213     }
2214     DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2215   }
2216 
2217   DBUG_RETURN(error);
2218 }
2219 
2220 /**
2221   @note
2222   according to the sql standard (ISO/IEC 9075-2:2003)
2223   section "4.33.4 SQL-statements and transaction states",
2224   SAVEPOINT is *not* transaction-initiating SQL-statement
2225 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2226 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2227 {
2228   int error=0;
2229   THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
2230                                         &thd->transaction.all);
2231   Ha_trx_info *ha_info= trans->ha_list;
2232   DBUG_ENTER("ha_savepoint");
2233 
2234   for (; ha_info; ha_info= ha_info->next())
2235   {
2236     int err;
2237     handlerton *ht= ha_info->ht();
2238     DBUG_ASSERT(ht);
2239     if (! ht->savepoint_set)
2240     {
2241       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2242       error=1;
2243       break;
2244     }
2245     if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2246     { // cannot happen
2247       my_error(ER_GET_ERRNO, MYF(0), err);
2248       error=1;
2249     }
2250     DBUG_ASSERT(!thd->status_var_aggregated);
2251     status_var_increment(thd->status_var.ha_savepoint_count);
2252   }
2253   /*
2254     Remember the list of registered storage engines. All new
2255     engines are prepended to the beginning of the list.
2256   */
2257   sv->ha_list= trans->ha_list;
2258 
2259   DBUG_RETURN(error);
2260 }
2261 
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2262 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2263 {
2264   int error=0;
2265   Ha_trx_info *ha_info= sv->ha_list;
2266   DBUG_ENTER("ha_release_savepoint");
2267 
2268   for (; ha_info; ha_info= ha_info->next())
2269   {
2270     int err;
2271     handlerton *ht= ha_info->ht();
2272     /* Savepoint life time is enclosed into transaction life time. */
2273     DBUG_ASSERT(ht);
2274     if (!ht->savepoint_release)
2275       continue;
2276     if ((err= ht->savepoint_release(ht, thd,
2277                                     (uchar *)(sv+1) + ht->savepoint_offset)))
2278     { // cannot happen
2279       my_error(ER_GET_ERRNO, MYF(0), err);
2280       error=1;
2281     }
2282   }
2283   DBUG_RETURN(error);
2284 }
2285 
2286 
clone_snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2287 static my_bool clone_snapshot_handlerton(THD *thd, plugin_ref plugin,
2288                                          void *arg)
2289 {
2290   handlerton *hton= plugin_data(plugin, handlerton *);
2291 
2292   if (hton->state == SHOW_OPTION_YES &&
2293       hton->clone_consistent_snapshot)
2294     hton->clone_consistent_snapshot(hton, thd, (THD *) arg);
2295 
2296   return FALSE;
2297 }
2298 
2299 
ha_clone_consistent_snapshot(THD * thd)2300 static int ha_clone_consistent_snapshot(THD *thd)
2301 {
2302   std::set<THD*> global_thread_list_copy;
2303   THD *from_thd;
2304   ulong id;
2305   Item *val;
2306   Thread_iterator it;
2307   Thread_iterator end;
2308 
2309   DBUG_ASSERT(!thd->lex->value_list.is_empty());
2310 
2311   val= (Item *) thd->lex->value_list.head();
2312 
2313   if (thd->lex->table_or_sp_used())
2314   {
2315     my_error(ER_NOT_SUPPORTED_YET, MYF(0), "Usage of subqueries or stored "
2316              "function calls as part of this statement");
2317     goto error;
2318   }
2319 
2320   if ((!val->fixed && val->fix_fields(thd, &val)) || val->check_cols(1))
2321   {
2322     my_error(ER_SET_CONSTANTS_ONLY, MYF(0));
2323     goto error;
2324   }
2325 
2326   id= val->val_int();
2327 
2328   mysql_mutex_lock(&LOCK_thd_remove);
2329   copy_global_thread_list(&global_thread_list_copy);
2330 
2331   it= global_thread_list_copy.begin();
2332   end= global_thread_list_copy.end();
2333   from_thd= NULL;
2334 
2335   for (; it != end; ++it)
2336   {
2337     if ((*it)->thread_id == id && *it != thd)
2338     {
2339       from_thd= *it;
2340       mysql_mutex_lock(&from_thd->LOCK_thd_data);
2341       break;
2342     }
2343   }
2344 
2345   mysql_mutex_unlock(&LOCK_thd_remove);
2346 
2347   if (!from_thd)
2348   {
2349     my_error(ER_NO_SUCH_THREAD, MYF(0), id);
2350     goto error;
2351   }
2352 
2353   /*
2354     Blocking commits and binlog updates ensures that we get the same snapshot
2355     for all engines (including the binary log). This allows us among other
2356     things to do backups with START TRANSACTION WITH CONSISTENT SNAPSHOT and
2357     have a consistent binlog position.
2358   */
2359   tc_log->xlock();
2360 
2361   plugin_foreach(thd, clone_snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2362                  from_thd);
2363 
2364   tc_log->xunlock();
2365 
2366   mysql_mutex_unlock(&from_thd->LOCK_thd_data);
2367 
2368   return 0;
2369 
2370 error:
2371 
2372   return 1;
2373 }
2374 
2375 
start_snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2376 static my_bool start_snapshot_handlerton(THD *thd, plugin_ref plugin,
2377                                          void *arg)
2378 {
2379   handlerton *hton= plugin_data(plugin, handlerton *);
2380   if (hton->state == SHOW_OPTION_YES &&
2381       hton->start_consistent_snapshot)
2382   {
2383     hton->start_consistent_snapshot(hton, thd);
2384     *((bool *)arg)= false;
2385   }
2386   return FALSE;
2387 }
2388 
ha_start_consistent_snapshot(THD * thd)2389 int ha_start_consistent_snapshot(THD *thd)
2390 {
2391 
2392   if (!thd->lex->value_list.is_empty())
2393       return ha_clone_consistent_snapshot(thd);
2394 
2395   bool warn= true;
2396 
2397   /*
2398     Blocking commits and binlog updates ensures that we get the same snapshot
2399     for all engines (including the binary log). This allows us among other
2400     things to do backups with START TRANSACTION WITH CONSISTENT SNAPSHOT and
2401     have a consistent binlog position.
2402   */
2403   tc_log->xlock();
2404 
2405   plugin_foreach(thd, start_snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2406                  &warn);
2407 
2408   tc_log->xunlock();
2409 
2410   /*
2411     Same idea as when one wants to CREATE TABLE in one engine which does not
2412     exist:
2413   */
2414   if (warn)
2415     push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2416                  "This MySQL server does not support any "
2417                  "consistent-read capable storage engine");
2418   return 0;
2419 }
2420 
2421 
store_binlog_info_handlerton(THD * thd,plugin_ref plugin,void * arg)2422 static my_bool store_binlog_info_handlerton(THD *thd, plugin_ref plugin,
2423                                             void *arg)
2424 {
2425   handlerton *hton= plugin_data(plugin, handlerton *);
2426 
2427   if (hton->state == SHOW_OPTION_YES &&
2428       hton->store_binlog_info)
2429   {
2430     hton->store_binlog_info(hton, thd);
2431     *((bool *)arg)= false;
2432   }
2433 
2434   return FALSE;
2435 }
2436 
2437 
ha_store_binlog_info(THD * thd)2438 int ha_store_binlog_info(THD *thd)
2439 {
2440   LOG_INFO li;
2441   bool warn= true;
2442 
2443   if (!mysql_bin_log.is_open())
2444     return 0;
2445 
2446   DBUG_ASSERT(tc_log == &mysql_bin_log);
2447 
2448   /* Block commits to get consistent binlog coordinates */
2449   tc_log->xlock();
2450 
2451   mysql_bin_log.raw_get_current_log(&li);
2452   thd->set_trans_pos(li.log_file_name, li.pos);
2453 
2454   plugin_foreach(thd, store_binlog_info_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2455                  &warn);
2456 
2457   tc_log->xunlock();
2458 
2459   if (warn)
2460     push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
2461                  "No support for storing binlog coordinates in any storage");
2462   return 0;
2463 }
2464 
2465 
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2466 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2467                                 void *arg)
2468 {
2469   handlerton *hton= plugin_data(plugin, handlerton *);
2470   if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2471       hton->flush_logs(hton))
2472     return TRUE;
2473   return FALSE;
2474 }
2475 
2476 
ha_flush_logs(handlerton * db_type)2477 bool ha_flush_logs(handlerton *db_type)
2478 {
2479   if (db_type == NULL)
2480   {
2481     if (plugin_foreach(NULL, flush_handlerton,
2482                           MYSQL_STORAGE_ENGINE_PLUGIN, 0))
2483       return TRUE;
2484   }
2485   else
2486   {
2487     if (db_type->state != SHOW_OPTION_YES ||
2488         (db_type->flush_logs && db_type->flush_logs(db_type)))
2489       return TRUE;
2490   }
2491   return FALSE;
2492 }
2493 
2494 
2495 /**
2496   @brief make canonical filename
2497 
2498   @param[in]  file     table handler
2499   @param[in]  path     original path
2500   @param[out] tmp_path buffer for canonized path
2501 
2502   @details Lower case db name and table name path parts for
2503            non file based tables when lower_case_table_names
2504            is 2 (store as is, compare in lower case).
2505            Filesystem path prefix (mysql_data_home or tmpdir)
2506            is left intact.
2507 
2508   @note tmp_path may be left intact if no conversion was
2509         performed.
2510 
2511   @retval canonized path
2512 
2513   @todo This may be done more efficiently when table path
2514         gets built. Convert this function to something like
2515         ASSERT_CANONICAL_FILENAME.
2516 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2517 const char *get_canonical_filename(handler *file, const char *path,
2518                                    char *tmp_path)
2519 {
2520   uint i;
2521   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2522     return path;
2523 
2524   for (i= 0; i <= mysql_tmpdir_list.max; i++)
2525   {
2526     if (is_prefix(path, mysql_tmpdir_list.list[i]))
2527       return path;
2528   }
2529 
2530   /* Ensure that table handler get path in lower case */
2531   if (tmp_path != path)
2532     strmov(tmp_path, path);
2533 
2534   /*
2535     we only should turn into lowercase database/table part
2536     so start the process after homedirectory
2537   */
2538   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2539   return tmp_path;
2540 }
2541 
2542 
2543 /**
2544   An interceptor to hijack the text of the error message without
2545   setting an error in the thread. We need the text to present it
2546   in the form of a warning to the user.
2547 */
2548 
2549 struct Ha_delete_table_error_handler: public Internal_error_handler
2550 {
2551 public:
2552   virtual bool handle_condition(THD *thd,
2553                                 uint sql_errno,
2554                                 const char* sqlstate,
2555                                 Sql_condition::enum_warning_level level,
2556                                 const char* msg,
2557                                 Sql_condition ** cond_hdl);
2558   char buff[MYSQL_ERRMSG_SIZE];
2559 };
2560 
2561 
2562 bool
2563 Ha_delete_table_error_handler::
handle_condition(THD *,uint,const char *,Sql_condition::enum_warning_level,const char * msg,Sql_condition ** cond_hdl)2564 handle_condition(THD *,
2565                  uint,
2566                  const char*,
2567                  Sql_condition::enum_warning_level,
2568                  const char* msg,
2569                  Sql_condition ** cond_hdl)
2570 {
2571   *cond_hdl= NULL;
2572   /* Grab the error message */
2573   strmake(buff, msg, sizeof(buff)-1);
2574   return TRUE;
2575 }
2576 
2577 
2578 /** @brief
2579   This should return ENOENT if the file doesn't exists.
2580   The .frm file will be deleted only if we return 0 or ENOENT
2581 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2582 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2583                     const char *db, const char *alias, bool generate_warning)
2584 {
2585   handler *file;
2586   char tmp_path[FN_REFLEN];
2587   int error;
2588   TABLE dummy_table;
2589   TABLE_SHARE dummy_share;
2590   DBUG_ENTER("ha_delete_table");
2591 
2592   memset(static_cast<void*>(&dummy_table), 0, sizeof(dummy_table));
2593   memset(static_cast<void*>(&dummy_share), 0, sizeof(dummy_share));
2594   dummy_table.s= &dummy_share;
2595 
2596   /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2597   if (table_type == NULL ||
2598       ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2599     DBUG_RETURN(ENOENT);
2600 
2601   path= get_canonical_filename(file, path, tmp_path);
2602   if ((error= file->ha_delete_table(path)) && generate_warning)
2603   {
2604     /*
2605       Because file->print_error() use my_error() to generate the error message
2606       we use an internal error handler to intercept it and store the text
2607       in a temporary buffer. Later the message will be presented to user
2608       as a warning.
2609     */
2610     Ha_delete_table_error_handler ha_delete_table_error_handler;
2611 
2612     /* Fill up strucutures that print_error may need */
2613     dummy_share.path.str= (char*) path;
2614     dummy_share.path.length= strlen(path);
2615     dummy_share.db.str= (char*) db;
2616     dummy_share.db.length= strlen(db);
2617     dummy_share.table_name.str= (char*) alias;
2618     dummy_share.table_name.length= strlen(alias);
2619     dummy_table.alias= alias;
2620 
2621     file->change_table_ptr(&dummy_table, &dummy_share);
2622 
2623     thd->push_internal_handler(&ha_delete_table_error_handler);
2624     file->print_error(error, 0);
2625 
2626     thd->pop_internal_handler();
2627 
2628     /*
2629       XXX: should we convert *all* errors to warnings here?
2630       What if the error is fatal?
2631     */
2632     push_warning(thd, Sql_condition::WARN_LEVEL_WARN, error,
2633                 ha_delete_table_error_handler.buff);
2634   }
2635   delete file;
2636 
2637 #ifdef HAVE_PSI_TABLE_INTERFACE
2638   if (likely(error == 0))
2639   {
2640     /* Table share not available, so check path for temp table prefix. */
2641     bool temp_table = (strstr(path, tmp_file_prefix) != NULL);
2642     PSI_TABLE_CALL(drop_table_share)
2643       (temp_table, db, strlen(db), alias, strlen(alias));
2644   }
2645 #endif
2646 
2647   DBUG_RETURN(error);
2648 }
2649 
2650 /****************************************************************************
2651 ** General handler functions
2652 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2653 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2654 {
2655   handler *new_handler= table ? get_new_handler(table->s, mem_root, ht) : NULL;
2656 
2657   if (!new_handler)
2658     return NULL;
2659   if (new_handler->set_ha_share_ref(ha_share))
2660     goto err;
2661 
2662   /*
2663     Allocate handler->ref here because otherwise ha_open will allocate it
2664     on this->table->mem_root and we will not be able to reclaim that memory
2665     when the clone handler object is destroyed.
2666   */
2667   if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2668                                               ALIGN_SIZE(ref_length)*2)))
2669     goto err;
2670 
2671   new_handler->cloned= true;
2672 
2673   /*
2674     TODO: Implement a more efficient way to have more than one index open for
2675     the same table instance. The ha_open call is not cachable for clone.
2676   */
2677   if (new_handler->ha_open(table, name, table->db_stat,
2678                            HA_OPEN_IGNORE_IF_LOCKED))
2679     goto err;
2680 
2681   return new_handler;
2682 
2683 err:
2684   delete new_handler;
2685   return NULL;
2686 }
2687 
2688 
2689 
ha_statistic_increment(ulonglong SSV::* offset) const2690 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2691 {
2692   if (table && table->in_use)
2693     status_var_increment(table->in_use->status_var.*offset);
2694 }
2695 
ha_data(THD * thd) const2696 void **handler::ha_data(THD *thd) const
2697 {
2698   return thd_ha_data(thd, ht);
2699 }
2700 
ha_thd(void) const2701 THD *handler::ha_thd(void) const
2702 {
2703   if (unlikely(cloned))
2704     return current_thd;
2705   DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2706   return (table && table->in_use) ? table->in_use : current_thd;
2707 }
2708 
unbind_psi()2709 void handler::unbind_psi()
2710 {
2711 #ifdef HAVE_PSI_TABLE_INTERFACE
2712   DBUG_ASSERT(m_lock_type == F_UNLCK);
2713   DBUG_ASSERT(inited == NONE);
2714   /*
2715     Notify the instrumentation that this table is not owned
2716     by this thread any more.
2717   */
2718   PSI_TABLE_CALL(unbind_table)(m_psi);
2719 #endif
2720 }
2721 
rebind_psi()2722 void handler::rebind_psi()
2723 {
2724 #ifdef HAVE_PSI_TABLE_INTERFACE
2725   DBUG_ASSERT(m_lock_type == F_UNLCK);
2726   DBUG_ASSERT(inited == NONE);
2727   /*
2728     Notify the instrumentation that this table is now owned
2729     by this thread.
2730   */
2731   PSI_table_share *share_psi= ha_table_share_psi(table_share);
2732   m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2733 #endif
2734 }
2735 
ha_table_share_psi(const TABLE_SHARE * share) const2736 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2737 {
2738   return share->m_psi;
2739 }
2740 
2741 /** @brief
2742   Open database-handler.
2743 
2744   IMPLEMENTATION
2745     Try O_RDONLY if cannot open as O_RDWR
2746     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2747 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2748 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2749                      int test_if_locked)
2750 {
2751   int error;
2752   DBUG_ENTER("handler::ha_open");
2753   DBUG_PRINT("enter",
2754              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2755               name, ht->db_type, table_arg->db_stat, mode,
2756               test_if_locked));
2757 
2758   table= table_arg;
2759   DBUG_ASSERT(table->s == table_share);
2760   DBUG_ASSERT(m_lock_type == F_UNLCK);
2761   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2762   DBUG_ASSERT(alloc_root_inited(&table->mem_root));
2763 
2764   if (cloned) {
2765     DEBUG_SYNC(ha_thd(), "start_handler_ha_open_cloned");
2766   }
2767 
2768   if ((error=open(name,mode,test_if_locked)))
2769   {
2770     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2771 	(table->db_stat & HA_TRY_READ_ONLY))
2772     {
2773       table->db_stat|=HA_READ_ONLY;
2774       error=open(name,O_RDONLY,test_if_locked);
2775     }
2776   }
2777   if (error)
2778   {
2779     my_errno= error;                            /* Safeguard */
2780     DBUG_PRINT("error",("error: %d  errno: %d",error,errno));
2781   }
2782   else
2783   {
2784     DBUG_ASSERT(m_psi == NULL);
2785     DBUG_ASSERT(table_share != NULL);
2786 #ifdef HAVE_PSI_TABLE_INTERFACE
2787     /*
2788       Do not call this for partitions handlers, since it may take too much
2789       resources.
2790       So only use the m_psi on table level, not for individual partitions.
2791     */
2792     if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2793     {
2794       PSI_table_share *share_psi= ha_table_share_psi(table_share);
2795       m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2796     }
2797 #endif
2798 
2799     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2800       table->db_stat|=HA_READ_ONLY;
2801     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
2802 
2803     /* ref is already allocated for us if we're called from handler::clone() */
2804     if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2805                                           ALIGN_SIZE(ref_length)*2)))
2806     {
2807       ha_close();
2808       error=HA_ERR_OUT_OF_MEM;
2809     }
2810     else
2811       dup_ref=ref+ALIGN_SIZE(ref_length);
2812     cached_table_flags= table_flags();
2813   }
2814 
2815   if (unlikely(opt_userstat))
2816   {
2817     rows_read= rows_changed= 0;
2818     memset(index_rows_read, 0, sizeof(index_rows_read));
2819   }
2820 
2821   DBUG_RETURN(error);
2822 }
2823 
2824 
2825 /**
2826   Close handler.
2827 */
2828 
ha_close(void)2829 int handler::ha_close(void)
2830 {
2831   DBUG_ENTER("handler::ha_close");
2832 #ifdef HAVE_PSI_TABLE_INTERFACE
2833   PSI_TABLE_CALL(close_table)(m_psi);
2834   m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2835 #endif
2836   // TODO: set table= NULL to mark the handler as closed?
2837   DBUG_ASSERT(m_psi == NULL);
2838   DBUG_ASSERT(m_lock_type == F_UNLCK);
2839   DBUG_ASSERT(inited == NONE);
2840   DBUG_RETURN(close());
2841 }
2842 
2843 
2844 /**
2845   Initialize use of index.
2846 
2847   @param idx     Index to use
2848   @param sorted  Use sorted order
2849 
2850   @return Operation status
2851     @retval 0     Success
2852     @retval != 0  Error (error code returned)
2853 */
2854 
ha_index_init(uint idx,bool sorted)2855 int handler::ha_index_init(uint idx, bool sorted)
2856 {
2857   DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2858   int result;
2859   DBUG_ENTER("ha_index_init");
2860   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2861               m_lock_type != F_UNLCK);
2862   DBUG_ASSERT(inited == NONE);
2863   if (!(result= index_init(idx, sorted)))
2864     inited= INDEX;
2865   end_range= NULL;
2866   DBUG_RETURN(result);
2867 }
2868 
2869 
2870 /**
2871   End use of index.
2872 
2873   @return Operation status
2874     @retval 0     Success
2875     @retval != 0  Error (error code returned)
2876 */
2877 
ha_index_end()2878 int handler::ha_index_end()
2879 {
2880   DBUG_ENTER("ha_index_end");
2881   /* SQL HANDLER function can call this without having it locked. */
2882   DBUG_ASSERT(table->open_by_handler ||
2883               table_share->tmp_table != NO_TMP_TABLE ||
2884               m_lock_type != F_UNLCK);
2885   DBUG_ASSERT(inited == INDEX);
2886   inited= NONE;
2887   end_range= NULL;
2888   DBUG_RETURN(index_end());
2889 }
2890 
2891 
2892 /**
2893   Initialize table for random read or scan.
2894 
2895   @param scan  if true: Initialize for random scans through rnd_next()
2896                if false: Initialize for random reads through rnd_pos()
2897 
2898   @return Operation status
2899     @retval 0     Success
2900     @retval != 0  Error (error code returned)
2901 */
2902 
ha_rnd_init(bool scan)2903 int handler::ha_rnd_init(bool scan)
2904 {
2905   DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2906   int result;
2907   DBUG_ENTER("ha_rnd_init");
2908   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2909               m_lock_type != F_UNLCK);
2910   DBUG_ASSERT(inited == NONE || (inited == RND && scan));
2911   if (scan && is_using_prohibited_gap_locks(table, false))
2912   {
2913     DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
2914   }
2915 
2916   inited= (result= rnd_init(scan)) ? NONE : RND;
2917   end_range= NULL;
2918   DBUG_RETURN(result);
2919 }
2920 
2921 
2922 /**
2923   End use of random access.
2924 
2925   @return Operation status
2926     @retval 0     Success
2927     @retval != 0  Error (error code returned)
2928 */
2929 
ha_rnd_end()2930 int handler::ha_rnd_end()
2931 {
2932   DBUG_ENTER("ha_rnd_end");
2933   /* SQL HANDLER function can call this without having it locked. */
2934   DBUG_ASSERT(table->open_by_handler ||
2935               table_share->tmp_table != NO_TMP_TABLE ||
2936               m_lock_type != F_UNLCK);
2937   DBUG_ASSERT(inited == RND);
2938   inited= NONE;
2939   end_range= NULL;
2940   DBUG_RETURN(rnd_end());
2941 }
2942 
2943 
2944 /**
2945   Read next row via random scan.
2946 
2947   @param buf  Buffer to read the row into
2948 
2949   @return Operation status
2950     @retval 0     Success
2951     @retval != 0  Error (error code returned)
2952 */
2953 
ha_rnd_next(uchar * buf)2954 int handler::ha_rnd_next(uchar *buf)
2955 {
2956   int result;
2957   DBUG_ENTER("handler::ha_rnd_next");
2958   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2959               m_lock_type != F_UNLCK);
2960   DBUG_ASSERT(inited == RND);
2961 
2962   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2963     { result= rnd_next(buf); })
2964 
2965   if (likely(!result))
2966   {
2967     update_index_stats(MAX_KEY);
2968   }
2969 
2970   DBUG_RETURN(result);
2971 }
2972 
2973 
2974 /**
2975   Read row via random scan from position.
2976 
2977   @param[out] buf  Buffer to read the row into
2978   @param      pos  Position from position() call
2979 
2980   @return Operation status
2981     @retval 0     Success
2982     @retval != 0  Error (error code returned)
2983 */
2984 
ha_rnd_pos(uchar * buf,uchar * pos)2985 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2986 {
2987   int result;
2988   DBUG_ENTER("handler::ha_rnd_pos");
2989   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
2990               m_lock_type != F_UNLCK);
2991   /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2992   /* DBUG_ASSERT(inited == RND); */
2993 
2994   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, MAX_KEY, 0,
2995     { result= rnd_pos(buf, pos); })
2996 
2997   if (likely(!result))
2998   {
2999     update_index_stats(MAX_KEY);
3000   }
3001 
3002   DBUG_RETURN(result);
3003 }
3004 
3005 
3006 /**
3007   Read [part of] row via [part of] index.
3008   @param[out] buf          buffer where store the data
3009   @param      key          Key to search for
3010   @param      keypart_map  Which part of key to use
3011   @param      find_flag    Direction/condition on key usage
3012 
3013   @returns Operation status
3014     @retval  0                   Success (found a record, and function has
3015                                  set table->status to 0)
3016     @retval  HA_ERR_END_OF_FILE  Row not found (function has set table->status
3017                                  to STATUS_NOT_FOUND)
3018     @retval  != 0                Error
3019 
3020   @note Positions an index cursor to the index specified in the handle.
3021   Fetches the row if available. If the key value is null,
3022   begin at the first key of the index.
3023   ha_index_read_map can be restarted without calling index_end on the previous
3024   index scan and without calling ha_index_init. In this case the
3025   ha_index_read_map is on the same index as the previous ha_index_scan.
3026   This is particularly used in conjunction with multi read ranges.
3027 */
3028 
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3029 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3030                                key_part_map keypart_map,
3031                                enum ha_rkey_function find_flag)
3032 {
3033   int result;
3034   DBUG_ENTER("handler::ha_index_read_map");
3035   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3036               m_lock_type != F_UNLCK);
3037   DBUG_ASSERT(inited == INDEX);
3038   if (is_using_prohibited_gap_locks(table, is_using_full_unique_key(
3039                                     active_index, keypart_map, find_flag)))
3040   {
3041     DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
3042   }
3043 
3044   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3045     { result= index_read_map(buf, key, keypart_map, find_flag); })
3046 
3047   if (likely(!result))
3048   {
3049     update_index_stats(active_index);
3050   }
3051 
3052   DBUG_RETURN(result);
3053 }
3054 
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3055 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3056                                     key_part_map keypart_map)
3057 {
3058   int result;
3059   DBUG_ENTER("handler::ha_index_read_last_map");
3060   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3061               m_lock_type != F_UNLCK);
3062   DBUG_ASSERT(inited == INDEX);
3063   if (is_using_prohibited_gap_locks(table, false))
3064   {
3065     DBUG_RETURN(HA_ERR_LOCK_DEADLOCK);
3066   }
3067 
3068   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3069     { result= index_read_last_map(buf, key, keypart_map); })
3070 
3071   if (likely(!result))
3072   {
3073     update_index_stats(active_index);
3074   }
3075 
3076   DBUG_RETURN(result);
3077 }
3078 
3079 
3080 /**
3081   Initializes an index and read it.
3082 
3083   @see handler::ha_index_read_map.
3084 */
3085 
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3086 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3087                                    key_part_map keypart_map,
3088                                    enum ha_rkey_function find_flag)
3089 {
3090   int result;
3091   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3092               m_lock_type != F_UNLCK);
3093   DBUG_ASSERT(end_range == NULL);
3094 
3095   if (is_using_prohibited_gap_locks(table, is_using_full_unique_key(
3096                                     index, keypart_map, find_flag)))
3097   {
3098     return HA_ERR_LOCK_DEADLOCK;
3099   }
3100 
3101   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, index, 0,
3102     { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3103 
3104   if (likely(!result))
3105   {
3106     update_index_stats(index);
3107   }
3108   return result;
3109 }
3110 
3111 
3112 /**
3113   Reads the next row via index.
3114 
3115   @param[out] buf  Row data
3116 
3117   @return Operation status.
3118     @retval  0                   Success
3119     @retval  HA_ERR_END_OF_FILE  Row not found
3120     @retval  != 0                Error
3121 */
3122 
ha_index_next(uchar * buf)3123 int handler::ha_index_next(uchar * buf)
3124 {
3125   int result;
3126   DBUG_ENTER("handler::ha_index_next");
3127   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3128               m_lock_type != F_UNLCK);
3129   DBUG_ASSERT(inited == INDEX);
3130 
3131   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3132     { result= index_next(buf); })
3133 
3134   if (likely(!result))
3135   {
3136     update_index_stats(active_index);
3137   }
3138 
3139   DEBUG_SYNC(ha_thd(), "handler_ha_index_next_end");
3140 
3141   DBUG_RETURN(result);
3142 }
3143 
3144 
3145 /**
3146   Reads the previous row via index.
3147 
3148   @param[out] buf  Row data
3149 
3150   @return Operation status.
3151     @retval  0                   Success
3152     @retval  HA_ERR_END_OF_FILE  Row not found
3153     @retval  != 0                Error
3154 */
3155 
ha_index_prev(uchar * buf)3156 int handler::ha_index_prev(uchar * buf)
3157 {
3158   int result;
3159   DBUG_ENTER("handler::ha_index_prev");
3160   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3161               m_lock_type != F_UNLCK);
3162   DBUG_ASSERT(inited == INDEX);
3163 
3164   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3165     { result= index_prev(buf); })
3166 
3167   if (likely(!result))
3168   {
3169     update_index_stats(active_index);
3170   }
3171 
3172   DBUG_RETURN(result);
3173 }
3174 
3175 
3176 /**
3177   Reads the first row via index.
3178 
3179   @param[out] buf  Row data
3180 
3181   @return Operation status.
3182     @retval  0                   Success
3183     @retval  HA_ERR_END_OF_FILE  Row not found
3184     @retval  != 0                Error
3185 */
3186 
ha_index_first(uchar * buf)3187 int handler::ha_index_first(uchar * buf)
3188 {
3189   int result;
3190   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3191               m_lock_type != F_UNLCK);
3192   DBUG_ASSERT(inited == INDEX);
3193 
3194   if (is_using_prohibited_gap_locks(table, false))
3195   {
3196     return HA_ERR_LOCK_DEADLOCK;
3197   }
3198 
3199   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3200     { result= index_first(buf); })
3201 
3202   if (likely(!result))
3203   {
3204     update_index_stats(active_index);
3205   }
3206 
3207   return result;
3208 }
3209 
is_using_full_key(key_part_map keypart_map,uint actual_key_parts)3210 bool handler::is_using_full_key(key_part_map keypart_map,
3211                                 uint actual_key_parts)
3212 {
3213   return (keypart_map == HA_WHOLE_KEY) ||
3214          (keypart_map == ((key_part_map(1) << actual_key_parts)
3215                         - 1));
3216 }
3217 
is_using_full_unique_key(uint index,key_part_map keypart_map,enum ha_rkey_function find_flag) const3218 bool handler::is_using_full_unique_key(uint index,
3219                                        key_part_map keypart_map,
3220                                        enum ha_rkey_function find_flag) const
3221 {
3222   return (is_using_full_key(keypart_map,
3223                             table->key_info[index].actual_key_parts)
3224           && find_flag == HA_READ_KEY_EXACT
3225           && (index == table->s->primary_key
3226               || (table->key_info[index].flags & HA_NOSAME)));
3227 }
3228 
3229 /**
3230   Reads the last row via index.
3231 
3232   @param[out] buf  Row data
3233 
3234   @return Operation status.
3235     @retval  0                   Success
3236     @retval  HA_ERR_END_OF_FILE  Row not found
3237     @retval  != 0                Error
3238 */
3239 
ha_index_last(uchar * buf)3240 int handler::ha_index_last(uchar * buf)
3241 {
3242   int result;
3243   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3244               m_lock_type != F_UNLCK);
3245   DBUG_ASSERT(inited == INDEX);
3246 
3247   if (is_using_prohibited_gap_locks(table, false))
3248   {
3249     return HA_ERR_LOCK_DEADLOCK;
3250   }
3251 
3252   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3253     { result= index_last(buf); })
3254 
3255   if (likely(!result))
3256   {
3257     update_index_stats(active_index);
3258   }
3259 
3260   return result;
3261 }
3262 
3263 
3264 /**
3265   Reads the next same row via index.
3266 
3267   @param[out] buf     Row data
3268   @param      key     Key to search for
3269   @param      keylen  Length of key
3270 
3271   @return Operation status.
3272     @retval  0                   Success
3273     @retval  HA_ERR_END_OF_FILE  Row not found
3274     @retval  != 0                Error
3275 */
3276 
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3277 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3278 {
3279   int result;
3280   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3281               m_lock_type != F_UNLCK);
3282   DBUG_ASSERT(inited == INDEX);
3283 
3284   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3285     { result= index_next_same(buf, key, keylen); })
3286 
3287   if (likely(!result))
3288   {
3289     update_index_stats(active_index);
3290   }
3291 
3292   return result;
3293 }
3294 
3295 
3296 /**
3297   Read one row via index.
3298 
3299   @param[out] buf        Row data
3300   @param      key        Key to search for
3301   @param      keylen     Length of key
3302   @param      find_flag  Direction/condition on key usage
3303 
3304   @return Operation status.
3305     @retval  0                   Success
3306     @retval  HA_ERR_END_OF_FILE  Row not found
3307     @retval  != 0                Error
3308 */
3309 
ha_index_read(uchar * buf,const uchar * key,uint key_len,enum ha_rkey_function find_flag)3310 int handler::ha_index_read(uchar *buf, const uchar *key, uint key_len,
3311                            enum ha_rkey_function find_flag)
3312 {
3313   int result;
3314   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3315               m_lock_type != F_UNLCK);
3316   DBUG_ASSERT(inited == INDEX);
3317 
3318   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3319     { result= index_read(buf, key, key_len, find_flag); })
3320 
3321   if (likely(!result))
3322   {
3323     update_index_stats(active_index);
3324   }
3325 
3326   return result;
3327 }
3328 
3329 
3330 /**
3331   Reads the last row via index.
3332 
3333   @param[out] buf        Row data
3334   @param      key        Key to search for
3335   @param      keylen     Length of key
3336 
3337   @return Operation status.
3338     @retval  0                   Success
3339     @retval  HA_ERR_END_OF_FILE  Row not found
3340     @retval  != 0                Error
3341 */
3342 
ha_index_read_last(uchar * buf,const uchar * key,uint key_len)3343 int handler::ha_index_read_last(uchar *buf, const uchar *key, uint key_len)
3344 {
3345   int result;
3346   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3347               m_lock_type != F_UNLCK);
3348   DBUG_ASSERT(inited == INDEX);
3349 
3350   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_FETCH_ROW, active_index, 0,
3351     { result= index_read_last(buf, key, key_len); })
3352 
3353   if (likely(!result))
3354   {
3355     update_index_stats(active_index);
3356   }
3357 
3358   return result;
3359 }
3360 
3361 
3362 /**
3363   Read first row (only) from a table.
3364 
3365   This is never called for InnoDB tables, as these table types
3366   has the HA_STATS_RECORDS_IS_EXACT set.
3367 */
read_first_row(uchar * buf,uint primary_key)3368 int handler::read_first_row(uchar * buf, uint primary_key)
3369 {
3370   int error;
3371   DBUG_ENTER("handler::read_first_row");
3372 
3373   ha_statistic_increment(&SSV::ha_read_first_count);
3374 
3375   /*
3376     If there is very few deleted rows in the table, find the first row by
3377     scanning the table.
3378     TODO remove the test for HA_READ_ORDER
3379   */
3380   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3381       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3382   {
3383     if (!(error= ha_rnd_init(1)))
3384     {
3385       while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3386         /* skip deleted row */;
3387       const int end_error= ha_rnd_end();
3388       if (!error)
3389         error= end_error;
3390     }
3391   }
3392   else
3393   {
3394     /* Find the first row through the primary key */
3395     if (!(error= ha_index_init(primary_key, 0)))
3396     {
3397       error= ha_index_first(buf);
3398       const int end_error= ha_index_end();
3399       if (!error)
3400         error= end_error;
3401     }
3402   }
3403   DBUG_RETURN(error);
3404 }
3405 
3406 /**
3407   Generate the next auto-increment number based on increment and offset.
3408   computes the lowest number
3409   - strictly greater than "nr"
3410   - of the form: auto_increment_offset + N * auto_increment_increment
3411   If overflow happened then return MAX_ULONGLONG value as an
3412   indication of overflow.
3413   In most cases increment= offset= 1, in which case we get:
3414   @verbatim 1,2,3,4,5,... @endverbatim
3415     If increment=10 and offset=5 and previous number is 1, we get:
3416   @verbatim 1,5,15,25,35,... @endverbatim
3417 */
3418 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3419 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3420 {
3421   const ulonglong save_nr= nr;
3422 
3423   if (variables->auto_increment_increment == 1)
3424     nr= nr + 1; // optimization of the formula below
3425   else
3426   {
3427     nr= (((nr+ variables->auto_increment_increment -
3428            variables->auto_increment_offset)) /
3429          (ulonglong) variables->auto_increment_increment);
3430     nr= (nr* (ulonglong) variables->auto_increment_increment +
3431          variables->auto_increment_offset);
3432   }
3433 
3434   if (unlikely(nr <= save_nr))
3435     return ULONGLONG_MAX;
3436 
3437   return nr;
3438 }
3439 
3440 
adjust_next_insert_id_after_explicit_value(ulonglong nr)3441 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3442 {
3443   /*
3444     If we have set THD::next_insert_id previously and plan to insert an
3445     explicitely-specified value larger than this, we need to increase
3446     THD::next_insert_id to be greater than the explicit value.
3447   */
3448   if ((next_insert_id > 0) && (nr >= next_insert_id))
3449     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3450 }
3451 
3452 
3453 /** @brief
3454   Computes the largest number X:
3455   - smaller than or equal to "nr"
3456   - of the form: auto_increment_offset + N * auto_increment_increment
3457   where N>=0.
3458 
3459   SYNOPSIS
3460     prev_insert_id
3461       nr            Number to "round down"
3462       variables     variables struct containing auto_increment_increment and
3463                     auto_increment_offset
3464 
3465   RETURN
3466     The number X if it exists, "nr" otherwise.
3467 */
3468 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3469 prev_insert_id(ulonglong nr, struct system_variables *variables)
3470 {
3471   if (unlikely(nr < variables->auto_increment_offset))
3472   {
3473     /*
3474       There's nothing good we can do here. That is a pathological case, where
3475       the offset is larger than the column's max possible value, i.e. not even
3476       the first sequence value may be inserted. User will receive warning.
3477     */
3478     DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3479                        "auto_increment_offset: %lu",
3480                        (ulong) nr, variables->auto_increment_offset));
3481     return nr;
3482   }
3483   if (variables->auto_increment_increment == 1)
3484     return nr; // optimization of the formula below
3485   nr= (((nr - variables->auto_increment_offset)) /
3486        (ulonglong) variables->auto_increment_increment);
3487   return (nr * (ulonglong) variables->auto_increment_increment +
3488           variables->auto_increment_offset);
3489 }
3490 
3491 
3492 /**
3493   Update the auto_increment field if necessary.
3494 
3495   Updates columns with type NEXT_NUMBER if:
3496 
3497   - If column value is set to NULL (in which case
3498     auto_increment_field_not_null is 0)
3499   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3500     set. In the future we will only set NEXT_NUMBER fields if one sets them
3501     to NULL (or they are not included in the insert list).
3502 
3503     In those cases, we check if the currently reserved interval still has
3504     values we have not used. If yes, we pick the smallest one and use it.
3505     Otherwise:
3506 
3507   - If a list of intervals has been provided to the statement via SET
3508     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3509     first unused interval from this list, consider it as reserved.
3510 
3511   - Otherwise we set the column for the first row to the value
3512     next_insert_id(get_auto_increment(column))) which is usually
3513     max-used-column-value+1.
3514     We call get_auto_increment() for the first row in a multi-row
3515     statement. get_auto_increment() will tell us the interval of values it
3516     reserved for us.
3517 
3518   - In both cases, for the following rows we use those reserved values without
3519     calling the handler again (we just progress in the interval, computing
3520     each new value from the previous one). Until we have exhausted them, then
3521     we either take the next provided interval or call get_auto_increment()
3522     again to reserve a new interval.
3523 
3524   - In both cases, the reserved intervals are remembered in
3525     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3526     binlogging; the last reserved interval is remembered in
3527     auto_inc_interval_for_cur_row. The number of reserved intervals is
3528     remembered in auto_inc_intervals_count. It differs from the number of
3529     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3530     latter list is cumulative over all statements forming one binlog event
3531     (when stored functions and triggers are used), and collapses two
3532     contiguous intervals in one (see its append() method).
3533 
3534     The idea is that generated auto_increment values are predictable and
3535     independent of the column values in the table.  This is needed to be
3536     able to replicate into a table that already has rows with a higher
3537     auto-increment value than the one that is inserted.
3538 
3539     After we have already generated an auto-increment number and the user
3540     inserts a column with a higher value than the last used one, we will
3541     start counting from the inserted value.
3542 
3543     This function's "outputs" are: the table's auto_increment field is filled
3544     with a value, thd->next_insert_id is filled with the value to use for the
3545     next row, if a value was autogenerated for the current row it is stored in
3546     thd->insert_id_for_cur_row, if get_auto_increment() was called
3547     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3548     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3549     this list.
3550 
3551   @todo
3552     Replace all references to "next number" or NEXT_NUMBER to
3553     "auto_increment", everywhere (see below: there is
3554     table->auto_increment_field_not_null, and there also exists
3555     table->next_number_field, it's not consistent).
3556 
3557   @retval
3558     0	ok
3559   @retval
3560     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3561     returned ~(ulonglong) 0
3562   @retval
3563     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3564     failure.
3565 */
3566 
3567 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3568 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3569 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3570 
update_auto_increment()3571 int handler::update_auto_increment()
3572 {
3573   ulonglong nr, nb_reserved_values;
3574   bool append= FALSE;
3575   THD *thd= table->in_use;
3576   struct system_variables *variables= &thd->variables;
3577   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3578               m_lock_type != F_UNLCK);
3579   DBUG_ENTER("handler::update_auto_increment");
3580 
3581   /*
3582     next_insert_id is a "cursor" into the reserved interval, it may go greater
3583     than the interval, but not smaller.
3584   */
3585   DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3586 
3587   if ((nr= table->next_number_field->val_int()) != 0 ||
3588       (table->auto_increment_field_not_null &&
3589       thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3590   {
3591     /*
3592       Update next_insert_id if we had already generated a value in this
3593       statement (case of INSERT VALUES(null),(3763),(null):
3594       the last NULL needs to insert 3764, not the value of the first NULL plus
3595       1).
3596     */
3597     adjust_next_insert_id_after_explicit_value(nr);
3598     insert_id_for_cur_row= 0; // didn't generate anything
3599     DBUG_RETURN(0);
3600   }
3601 
3602   if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3603   {
3604     /* next_insert_id is beyond what is reserved, so we reserve more. */
3605     const Discrete_interval *forced=
3606       thd->auto_inc_intervals_forced.get_next();
3607     if (forced != NULL)
3608     {
3609       nr= forced->minimum();
3610       /*
3611         In a multi insert statement when the number of affected rows is known
3612         then reserve those many number of auto increment values. So that
3613         interval will be starting value to starting value + number of affected
3614         rows * increment of auto increment.
3615        */
3616       nb_reserved_values= (estimation_rows_to_insert > 0) ?
3617         estimation_rows_to_insert : forced->values();
3618     }
3619     else
3620     {
3621       /*
3622         handler::estimation_rows_to_insert was set by
3623         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3624       */
3625       ulonglong nb_desired_values;
3626       /*
3627         If an estimation was given to the engine:
3628         - use it.
3629         - if we already reserved numbers, it means the estimation was
3630         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3631         time, twice that the 3rd time etc.
3632         If no estimation was given, use those increasing defaults from the
3633         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3634         Don't go beyond a max to not reserve "way too much" (because
3635         reservation means potentially losing unused values).
3636         Note that in prelocked mode no estimation is given.
3637       */
3638 
3639       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3640         nb_desired_values= estimation_rows_to_insert;
3641       else if ((auto_inc_intervals_count == 0) &&
3642                (thd->lex->many_values.elements > 0))
3643       {
3644         /*
3645           For multi-row inserts, if the bulk inserts cannot be started, the
3646           handler::estimation_rows_to_insert will not be set. But we still
3647           want to reserve the autoinc values.
3648         */
3649         nb_desired_values= thd->lex->many_values.elements;
3650       }
3651       else /* go with the increasing defaults */
3652       {
3653         /* avoid overflow in formula, with this if() */
3654         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3655         {
3656           nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3657             (1 << auto_inc_intervals_count);
3658           set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3659         }
3660         else
3661           nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3662       }
3663       /* This call ignores all its parameters but nr, currently */
3664       get_auto_increment(variables->auto_increment_offset,
3665                          variables->auto_increment_increment,
3666                          nb_desired_values, &nr,
3667                          &nb_reserved_values);
3668       if (nr == ULONGLONG_MAX)
3669         DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
3670 
3671       /*
3672         That rounding below should not be needed when all engines actually
3673         respect offset and increment in get_auto_increment(). But they don't
3674         so we still do it. Wonder if for the not-first-in-index we should do
3675         it. Hope that this rounding didn't push us out of the interval; even
3676         if it did we cannot do anything about it (calling the engine again
3677         will not help as we inserted no row).
3678       */
3679       nr= compute_next_insert_id(nr-1, variables);
3680     }
3681 
3682     if (table->s->next_number_keypart == 0)
3683     {
3684       /* We must defer the appending until "nr" has been possibly truncated */
3685       append= TRUE;
3686     }
3687     else
3688     {
3689       /*
3690         For such auto_increment there is no notion of interval, just a
3691         singleton. The interval is not even stored in
3692         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3693         for next row.
3694       */
3695       DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3696     }
3697   }
3698 
3699   if (unlikely(nr == ULONGLONG_MAX))
3700       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3701 
3702   DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3703 
3704   if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3705   {
3706     /*
3707       first test if the query was aborted due to strict mode constraints
3708     */
3709     if (thd->killed == THD::KILL_BAD_DATA)
3710       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3711 
3712     /*
3713       field refused this value (overflow) and truncated it, use the result of
3714       the truncation (which is going to be inserted); however we try to
3715       decrease it to honour auto_increment_* variables.
3716       That will shift the left bound of the reserved interval, we don't
3717       bother shifting the right bound (anyway any other value from this
3718       interval will cause a duplicate key).
3719     */
3720     nr= prev_insert_id(table->next_number_field->val_int(), variables);
3721     if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3722       nr= table->next_number_field->val_int();
3723   }
3724   if (append)
3725   {
3726     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3727                                           variables->auto_increment_increment);
3728     auto_inc_intervals_count++;
3729     /* Row-based replication does not need to store intervals in binlog */
3730     if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3731         thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3732                                                               auto_inc_interval_for_cur_row.values(),
3733                                                               variables->auto_increment_increment);
3734   }
3735 
3736   /*
3737     Record this autogenerated value. If the caller then
3738     succeeds to insert this value, it will call
3739     record_first_successful_insert_id_in_cur_stmt()
3740     which will set first_successful_insert_id_in_cur_stmt if it's not
3741     already set.
3742   */
3743   insert_id_for_cur_row= nr;
3744   /*
3745     Set next insert id to point to next auto-increment value to be able to
3746     handle multi-row statements.
3747   */
3748   set_next_insert_id(compute_next_insert_id(nr, variables));
3749 
3750   DBUG_RETURN(0);
3751 }
3752 
3753 
3754 /** @brief
3755   MySQL signal that it changed the column bitmap
3756 
3757   USAGE
3758     This is for handlers that needs to setup their own column bitmaps.
3759     Normally the handler should set up their own column bitmaps in
3760     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3761     this.
3762 
3763     The handler is allowd to do changes to the bitmap after a index_init or
3764     rnd_init() call is made as after this, MySQL will not use the bitmap
3765     for any program logic checking.
3766 */
column_bitmaps_signal()3767 void handler::column_bitmaps_signal()
3768 {
3769   DBUG_ENTER("column_bitmaps_signal");
3770   DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx", (long) table->read_set,
3771                       (long)table->write_set));
3772   DBUG_VOID_RETURN;
3773 }
3774 
3775 
3776 /**
3777   Reserves an interval of auto_increment values from the handler.
3778 
3779   @param       offset              offset (modulus increment)
3780   @param       increment           increment between calls
3781   @param       nb_desired_values   how many values we want
3782   @param[out]  first_value         the first value reserved by the handler
3783   @param[out]  nb_reserved_values  how many values the handler reserved
3784 
3785   offset and increment means that we want values to be of the form
3786   offset + N * increment, where N>=0 is integer.
3787   If the function sets *first_value to ULONGLONG_MAX it means an error.
3788   If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
3789   reserved to "positive infinite".
3790 */
3791 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3792 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3793                                  ulonglong nb_desired_values,
3794                                  ulonglong *first_value,
3795                                  ulonglong *nb_reserved_values)
3796 {
3797   ulonglong nr;
3798   int error;
3799   DBUG_ENTER("handler::get_auto_increment");
3800 
3801   (void) extra(HA_EXTRA_KEYREAD);
3802   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3803                                         table->read_set);
3804   column_bitmaps_signal();
3805 
3806   if (ha_index_init(table->s->next_number_index, 1))
3807   {
3808     /* This should never happen, assert in debug, and fail in release build */
3809     DBUG_ASSERT(0);
3810     *first_value= ULONGLONG_MAX;
3811     DBUG_VOID_RETURN;
3812   }
3813 
3814   if (table->s->next_number_keypart == 0)
3815   {						// Autoincrement at key-start
3816     error= ha_index_last(table->record[1]);
3817     /*
3818       MySQL implicitely assumes such method does locking (as MySQL decides to
3819       use nr+increment without checking again with the handler, in
3820       handler::update_auto_increment()), so reserves to infinite.
3821     */
3822     *nb_reserved_values= ULONGLONG_MAX;
3823   }
3824   else
3825   {
3826     uchar key[MAX_KEY_LENGTH];
3827     key_copy(key, table->record[0],
3828              table->key_info + table->s->next_number_index,
3829              table->s->next_number_key_offset);
3830     error= ha_index_read_map(table->record[1], key,
3831                              make_prev_keypart_map(table->s->next_number_keypart),
3832                              HA_READ_PREFIX_LAST);
3833     /*
3834       MySQL needs to call us for next row: assume we are inserting ("a",null)
3835       here, we return 3, and next this statement will want to insert
3836       ("b",null): there is no reason why ("b",3+1) would be the good row to
3837       insert: maybe it already exists, maybe 3+1 is too large...
3838     */
3839     *nb_reserved_values= 1;
3840   }
3841 
3842   if (error)
3843   {
3844     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3845     {
3846       /* No entry found, start with 1. */
3847       nr= 1;
3848     }
3849     else
3850     {
3851       DBUG_ASSERT(0);
3852       nr= ULONGLONG_MAX;
3853     }
3854   }
3855   else
3856     nr= ((ulonglong) table->next_number_field->
3857          val_int_offset(table->s->rec_buff_length)+1);
3858   ha_index_end();
3859   (void) extra(HA_EXTRA_NO_KEYREAD);
3860   *first_value= nr;
3861   DBUG_VOID_RETURN;
3862 }
3863 
3864 
ha_release_auto_increment()3865 void handler::ha_release_auto_increment()
3866 {
3867   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3868               m_lock_type != F_UNLCK ||
3869               (!next_insert_id && !insert_id_for_cur_row));
3870   release_auto_increment();
3871   insert_id_for_cur_row= 0;
3872   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3873   auto_inc_intervals_count= 0;
3874   if (next_insert_id > 0)
3875   {
3876     next_insert_id= 0;
3877     /*
3878       this statement used forced auto_increment values if there were some,
3879       wipe them away for other statements.
3880     */
3881     table->in_use->auto_inc_intervals_forced.empty();
3882   }
3883 }
3884 
3885 
3886 /**
3887   Construct and emit duplicate key error message using information
3888   from table's record buffer.
3889 
3890   @param table    TABLE object which record buffer should be used as
3891                   source for column values.
3892   @param key      Key description.
3893   @param msg      Error message template to which key value should be
3894                   added.
3895   @param errflag  Flags for my_error() call.
3896 */
3897 
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3898 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3899 {
3900   /* Write the duplicated key in the error message */
3901   char key_buff[MAX_KEY_LENGTH];
3902   String str(key_buff,sizeof(key_buff),system_charset_info);
3903 
3904   if (key == NULL)
3905   {
3906     /* Key is unknown */
3907     str.copy("", 0, system_charset_info);
3908     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3909   }
3910   else
3911   {
3912     /* Table is opened and defined at this point */
3913     key_unpack(&str,table, key);
3914     uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
3915     if (str.length() >= max_length)
3916     {
3917       str.length(max_length-4);
3918       str.append(STRING_WITH_LEN("..."));
3919     }
3920     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3921   }
3922 }
3923 
3924 
3925 /**
3926   Construct and emit duplicate key error message using information
3927   from table's record buffer.
3928 
3929   @sa print_keydup_error(table, key, msg, errflag).
3930 */
3931 
print_keydup_error(TABLE * table,KEY * key,myf errflag)3932 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3933 {
3934   print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3935 }
3936 
3937 
3938 /**
3939   Print error that we got from handler function.
3940 
3941   @note
3942     In case of delete table it's only safe to use the following parts of
3943     the 'table' structure:
3944     - table->s->path
3945     - table->alias
3946 */
print_error(int error,myf errflag)3947 void handler::print_error(int error, myf errflag)
3948 {
3949   DBUG_ENTER("handler::print_error");
3950   DBUG_PRINT("enter",("error: %d",error));
3951 
3952   int textno=ER_GET_ERRNO;
3953   switch (error) {
3954   case EACCES:
3955     textno=ER_OPEN_AS_READONLY;
3956     break;
3957   case EAGAIN:
3958     textno=ER_FILE_USED;
3959     break;
3960   case ENOENT:
3961     {
3962       char errbuf[MYSYS_STRERROR_SIZE];
3963       textno=ER_FILE_NOT_FOUND;
3964       my_error(textno, errflag, table_share->table_name.str,
3965                error, my_strerror(errbuf, sizeof(errbuf), error));
3966     }
3967     break;
3968   case HA_ERR_KEY_NOT_FOUND:
3969   case HA_ERR_NO_ACTIVE_RECORD:
3970   case HA_ERR_RECORD_DELETED:
3971   case HA_ERR_END_OF_FILE:
3972     textno=ER_KEY_NOT_FOUND;
3973     break;
3974   case HA_ERR_WRONG_MRG_TABLE_DEF:
3975     textno=ER_WRONG_MRG_TABLE;
3976     break;
3977   case HA_ERR_FOUND_DUPP_KEY:
3978   {
3979     uint key_nr= table ? get_dup_key(error) : -1;
3980     if ((int) key_nr >= 0)
3981     {
3982       print_keydup_error(table,
3983                          key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
3984                          errflag);
3985       DBUG_VOID_RETURN;
3986     }
3987     textno=ER_DUP_KEY;
3988     break;
3989   }
3990   case HA_ERR_FOREIGN_DUPLICATE_KEY:
3991   {
3992     DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3993                 m_lock_type != F_UNLCK);
3994 
3995     char rec_buf[MAX_KEY_LENGTH];
3996     String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3997     /* Table is opened and defined at this point */
3998 
3999     /*
4000       Just print the subset of fields that are part of the first index,
4001       printing the whole row from there is not easy.
4002     */
4003     key_unpack(&rec, table, &table->key_info[0]);
4004 
4005     char child_table_name[NAME_LEN + 1];
4006     char child_key_name[NAME_LEN + 1];
4007     if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4008                             child_key_name, sizeof(child_key_name)))
4009     {
4010       my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4011                table_share->table_name.str, rec.c_ptr_safe(),
4012                child_table_name, child_key_name);
4013     }
4014     else
4015     {
4016       my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4017                table_share->table_name.str, rec.c_ptr_safe());
4018     }
4019     DBUG_VOID_RETURN;
4020   }
4021   case HA_ERR_NULL_IN_SPATIAL:
4022     my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4023     DBUG_VOID_RETURN;
4024   case HA_ERR_FOUND_DUPP_UNIQUE:
4025     textno=ER_DUP_UNIQUE;
4026     break;
4027   case HA_ERR_RECORD_CHANGED:
4028     textno=ER_CHECKREAD;
4029     break;
4030   case HA_ERR_CRASHED:
4031     textno=ER_NOT_KEYFILE;
4032     break;
4033   case HA_ERR_WRONG_IN_RECORD:
4034     textno= ER_CRASHED_ON_USAGE;
4035     break;
4036   case HA_ERR_CRASHED_ON_USAGE:
4037     textno=ER_CRASHED_ON_USAGE;
4038     break;
4039   case HA_ERR_NOT_A_TABLE:
4040     textno= error;
4041     break;
4042   case HA_ERR_CRASHED_ON_REPAIR:
4043     textno=ER_CRASHED_ON_REPAIR;
4044     break;
4045   case HA_ERR_OUT_OF_MEM:
4046     textno=ER_OUT_OF_RESOURCES;
4047     break;
4048   case HA_ERR_WRONG_COMMAND:
4049     textno=ER_ILLEGAL_HA;
4050     break;
4051   case HA_ERR_OLD_FILE:
4052     textno=ER_OLD_KEYFILE;
4053     break;
4054   case HA_ERR_UNSUPPORTED:
4055     textno=ER_UNSUPPORTED_EXTENSION;
4056     break;
4057   case HA_ERR_RECORD_FILE_FULL:
4058   case HA_ERR_INDEX_FILE_FULL:
4059   {
4060     textno=ER_RECORD_FILE_FULL;
4061     /* Write the error message to error log */
4062     errflag|= ME_NOREFRESH;
4063     break;
4064   }
4065   case HA_ERR_LOCK_WAIT_TIMEOUT:
4066     textno=ER_LOCK_WAIT_TIMEOUT;
4067     break;
4068   case HA_ERR_LOCK_TABLE_FULL:
4069     textno=ER_LOCK_TABLE_FULL;
4070     break;
4071   case HA_ERR_LOCK_DEADLOCK:
4072     textno=ER_LOCK_DEADLOCK;
4073     break;
4074   case HA_ERR_READ_ONLY_TRANSACTION:
4075     textno=ER_READ_ONLY_TRANSACTION;
4076     break;
4077   case HA_ERR_CANNOT_ADD_FOREIGN:
4078     textno=ER_CANNOT_ADD_FOREIGN;
4079     break;
4080   case HA_ERR_ROW_IS_REFERENCED:
4081   {
4082     String str;
4083     get_error_message(error, &str);
4084     my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4085     DBUG_VOID_RETURN;
4086   }
4087   case HA_ERR_NO_REFERENCED_ROW:
4088   {
4089     String str;
4090     get_error_message(error, &str);
4091     my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4092     DBUG_VOID_RETURN;
4093   }
4094   case HA_ERR_TABLE_DEF_CHANGED:
4095     textno=ER_TABLE_DEF_CHANGED;
4096     break;
4097   case HA_ERR_NO_SUCH_TABLE:
4098     my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4099              table_share->table_name.str);
4100     DBUG_VOID_RETURN;
4101   case HA_ERR_RBR_LOGGING_FAILED:
4102     textno= ER_BINLOG_ROW_LOGGING_FAILED;
4103     break;
4104   case HA_ERR_DROP_INDEX_FK:
4105   {
4106     const char *ptr= "???";
4107     uint key_nr= table ? get_dup_key(error) : -1;
4108     if ((int) key_nr >= 0)
4109       ptr= table->key_info[key_nr].name;
4110     my_error(ER_DROP_INDEX_FK, errflag, ptr);
4111     DBUG_VOID_RETURN;
4112   }
4113   case HA_ERR_TABLE_NEEDS_UPGRADE:
4114     textno=ER_TABLE_NEEDS_UPGRADE;
4115     break;
4116   case HA_ERR_NO_PARTITION_FOUND:
4117     textno=ER_WRONG_PARTITION_NAME;
4118     break;
4119   case HA_ERR_TABLE_READONLY:
4120     textno= ER_OPEN_AS_READONLY;
4121     break;
4122   case HA_ERR_AUTOINC_READ_FAILED:
4123     textno= ER_AUTOINC_READ_FAILED;
4124     break;
4125   case HA_ERR_AUTOINC_ERANGE:
4126     textno= ER_WARN_DATA_OUT_OF_RANGE;
4127     break;
4128   case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4129     textno= ER_TOO_MANY_CONCURRENT_TRXS;
4130     break;
4131   case HA_ERR_INDEX_COL_TOO_LONG:
4132     textno= ER_INDEX_COLUMN_TOO_LONG;
4133     break;
4134   case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4135     textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4136     break;
4137   case HA_ERR_INDEX_CORRUPT:
4138     textno= ER_INDEX_CORRUPT;
4139     break;
4140   case HA_ERR_UNDO_REC_TOO_BIG:
4141     textno= ER_UNDO_RECORD_TOO_BIG;
4142     break;
4143   case HA_ERR_TABLE_IN_FK_CHECK:
4144     textno= ER_TABLE_IN_FK_CHECK;
4145     break;
4146   case HA_WRONG_CREATE_OPTION:
4147     textno= ER_ILLEGAL_HA;
4148     break;
4149   case HA_ERR_TOO_MANY_FIELDS:
4150     textno= ER_TOO_MANY_FIELDS;
4151     break;
4152   case HA_ERR_INNODB_READ_ONLY:
4153     textno= ER_INNODB_READ_ONLY;
4154     break;
4155   case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4156     textno= ER_TEMP_FILE_WRITE_FAILURE;
4157     break;
4158   case HA_ERR_INNODB_FORCED_RECOVERY:
4159     textno= ER_INNODB_FORCED_RECOVERY;
4160     break;
4161   default:
4162     {
4163       /* The error was "unknown" to this function.
4164 	 Ask handler if it has got a message for this error */
4165       bool temporary= FALSE;
4166       String str;
4167       temporary= get_error_message(error, &str);
4168       if (!str.is_empty())
4169       {
4170 	const char* engine= table_type();
4171 	if (temporary)
4172 	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4173 	else
4174 	  my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4175       }
4176       else
4177 	my_error(ER_GET_ERRNO,errflag,error);
4178       DBUG_VOID_RETURN;
4179     }
4180   }
4181   if (textno != ER_FILE_NOT_FOUND)
4182     my_error(textno, errflag, table_share->table_name.str, error);
4183   DBUG_VOID_RETURN;
4184 }
4185 
4186 
4187 /**
4188   Return an error message specific to this handler.
4189 
4190   @param error  error code previously returned by handler
4191   @param buf    pointer to String where to add error message
4192 
4193   @return
4194     Returns true if this is a temporary error
4195 */
get_error_message(int error,String * buf)4196 bool handler::get_error_message(int error, String* buf)
4197 {
4198   return FALSE;
4199 }
4200 
4201 
4202 /**
4203   Check for incompatible collation changes.
4204 
4205   @retval
4206     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4207   @retval
4208     0                        No upgrade required.
4209 */
4210 
check_collation_compatibility()4211 int handler::check_collation_compatibility()
4212 {
4213   ulong mysql_version= table->s->mysql_version;
4214 
4215   if (mysql_version < 50124)
4216   {
4217     KEY *key= table->key_info;
4218     KEY *key_end= key + table->s->keys;
4219     for (; key < key_end; key++)
4220     {
4221       KEY_PART_INFO *key_part= key->key_part;
4222       KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4223       for (; key_part < key_part_end; key_part++)
4224       {
4225         if (!key_part->fieldnr)
4226           continue;
4227         Field *field= table->field[key_part->fieldnr - 1];
4228         uint cs_number= field->charset()->number;
4229         if ((mysql_version < 50048 &&
4230              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4231               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4232               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4233               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4234               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4235               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4236               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4237               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4238              (mysql_version < 50124 &&
4239              (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4240               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4241           return HA_ADMIN_NEEDS_UPGRADE;
4242       }
4243     }
4244   }
4245   return 0;
4246 }
4247 
4248 
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4249 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4250 {
4251   int error;
4252   KEY *keyinfo, *keyend;
4253   KEY_PART_INFO *keypart, *keypartend;
4254 
4255   if (!table->s->mysql_version)
4256   {
4257     /* check for blob-in-key error */
4258     keyinfo= table->key_info;
4259     keyend= table->key_info + table->s->keys;
4260     for (; keyinfo < keyend; keyinfo++)
4261     {
4262       keypart= keyinfo->key_part;
4263       keypartend= keypart + keyinfo->user_defined_key_parts;
4264       for (; keypart < keypartend; keypart++)
4265       {
4266         if (!keypart->fieldnr)
4267           continue;
4268         Field *field= table->field[keypart->fieldnr-1];
4269         if (field->type() == MYSQL_TYPE_BLOB)
4270         {
4271           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4272             check_opt->flags= T_MEDIUM;
4273           return HA_ADMIN_NEEDS_CHECK;
4274         }
4275       }
4276     }
4277   }
4278   if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4279     return HA_ADMIN_NEEDS_ALTER;
4280 
4281   if ((error= check_collation_compatibility()))
4282     return error;
4283 
4284   return check_for_upgrade(check_opt);
4285 }
4286 
4287 
check_old_types()4288 int handler::check_old_types()
4289 {
4290   Field** field;
4291 
4292   for (field= table->field; (*field); field++)
4293   {
4294     if (table->s->mysql_version == 0) // prior to MySQL 5.0
4295     {
4296       /* check for bad DECIMAL field */
4297       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) // TODO: error? MYSQL_TYPE_DECIMAL?
4298       {
4299         return HA_ADMIN_NEEDS_ALTER;
4300       }
4301       if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4302       {
4303         return HA_ADMIN_NEEDS_ALTER;
4304       }
4305     }
4306     if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4307       return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4308   }
4309   return 0;
4310 }
4311 
4312 
update_frm_version(TABLE * table)4313 static bool update_frm_version(TABLE *table)
4314 {
4315   char path[FN_REFLEN];
4316   File file;
4317   int result= 1;
4318   DBUG_ENTER("update_frm_version");
4319 
4320   /*
4321     No need to update frm version in case table was created or checked
4322     by server with the same version. This also ensures that we do not
4323     update frm version for temporary tables as this code doesn't support
4324     temporary tables.
4325   */
4326   if (table->s->mysql_version == MYSQL_VERSION_ID)
4327     DBUG_RETURN(0);
4328 
4329   strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4330 
4331   if ((file= mysql_file_open(key_file_frm,
4332                              path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4333   {
4334     uchar version[4];
4335 
4336     int4store(version, MYSQL_VERSION_ID);
4337 
4338     if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4339       goto err;
4340 
4341     table->s->mysql_version= MYSQL_VERSION_ID;
4342   }
4343 err:
4344   if (file >= 0)
4345     (void) mysql_file_close(file, MYF(MY_WME));
4346   DBUG_RETURN(result);
4347 }
4348 
4349 
4350 
4351 /**
4352   @return
4353     key if error because of duplicated keys
4354 */
get_dup_key(int error)4355 uint handler::get_dup_key(int error)
4356 {
4357   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4358               m_lock_type != F_UNLCK);
4359   DBUG_ENTER("handler::get_dup_key");
4360   table->file->errkey  = (uint) -1;
4361   if (error == HA_ERR_FOUND_DUPP_KEY ||
4362       error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4363       error == HA_ERR_DROP_INDEX_FK)
4364     table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4365   DBUG_RETURN(table->file->errkey);
4366 }
4367 
4368 
4369 /**
4370   Delete all files with extension from bas_ext().
4371 
4372   @param name		Base name of table
4373 
4374   @note
4375     We assume that the handler may return more extensions than
4376     was actually used for the file.
4377 
4378   @retval
4379     0   If we successfully deleted at least one file from base_ext and
4380     didn't get any other errors than ENOENT
4381   @retval
4382     !0  Error
4383 */
delete_table(const char * name)4384 int handler::delete_table(const char *name)
4385 {
4386   int saved_error= 0;
4387   int error= 0;
4388   int enoent_or_zero= ENOENT;                   // Error if no file was deleted
4389   char buff[FN_REFLEN];
4390   DBUG_ASSERT(m_lock_type == F_UNLCK);
4391 
4392   for (const char **ext=bas_ext(); *ext ; ext++)
4393   {
4394     fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4395     if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4396     {
4397       if (my_errno != ENOENT)
4398       {
4399         /*
4400           If error on the first existing file, return the error.
4401           Otherwise delete as much as possible.
4402         */
4403         if (enoent_or_zero)
4404           return my_errno;
4405 	saved_error= my_errno;
4406       }
4407     }
4408     else
4409       enoent_or_zero= 0;                        // No error for ENOENT
4410     error= enoent_or_zero;
4411   }
4412   return saved_error ? saved_error : error;
4413 }
4414 
4415 
rename_table(const char * from,const char * to)4416 int handler::rename_table(const char * from, const char * to)
4417 {
4418   int error= 0;
4419   const char **ext, **start_ext;
4420   start_ext= bas_ext();
4421   for (ext= start_ext; *ext ; ext++)
4422   {
4423     if (rename_file_ext(from, to, *ext))
4424     {
4425       if ((error=my_errno) != ENOENT)
4426 	break;
4427       error= 0;
4428     }
4429   }
4430   if (error)
4431   {
4432     /* Try to revert the rename. Ignore errors. */
4433     for (; ext >= start_ext; ext--)
4434       rename_file_ext(to, from, *ext);
4435   }
4436   return error;
4437 }
4438 
4439 
drop_table(const char * name)4440 void handler::drop_table(const char *name)
4441 {
4442   close();
4443   delete_table(name);
4444 }
4445 
4446 
4447 /**
4448   Performs checks upon the table.
4449 
4450   @param thd                thread doing CHECK TABLE operation
4451   @param check_opt          options from the parser
4452 
4453   @retval
4454     HA_ADMIN_OK               Successful upgrade
4455   @retval
4456     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4457   @retval
4458     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4459   @retval
4460     HA_ADMIN_NOT_IMPLEMENTED
4461 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4462 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4463 {
4464   int error;
4465   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4466               m_lock_type != F_UNLCK);
4467 
4468   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4469       (check_opt->sql_flags & TT_FOR_UPGRADE))
4470     return 0;
4471 
4472   if (table->s->mysql_version < MYSQL_VERSION_ID)
4473   {
4474     if ((error= check_old_types()))
4475       return error;
4476     error= ha_check_for_upgrade(check_opt);
4477     if (error && (error != HA_ADMIN_NEEDS_CHECK))
4478       return error;
4479     if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
4480       return 0;
4481   }
4482   if ((error= check(thd, check_opt)))
4483     return error;
4484   /* Skip updating frm version if not main handler. */
4485   if (table->file != this)
4486     return error;
4487   return update_frm_version(table);
4488 }
4489 
4490 /**
4491   A helper function to mark a transaction read-write,
4492   if it is started.
4493 */
4494 
4495 inline
4496 void
mark_trx_read_write()4497 handler::mark_trx_read_write()
4498 {
4499   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4500   /*
4501     When a storage engine method is called, the transaction must
4502     have been started, unless it's a DDL call, for which the
4503     storage engine starts the transaction internally, and commits
4504     it internally, without registering in the ha_list.
4505     Unfortunately here we can't know know for sure if the engine
4506     has registered the transaction or not, so we must check.
4507   */
4508   if (ha_info->is_started())
4509   {
4510     DBUG_ASSERT(has_transactions());
4511     /*
4512       table_share can be NULL in ha_delete_table(). See implementation
4513       of standalone function ha_delete_table() in sql_base.cc.
4514     */
4515     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4516       ha_info->set_trx_read_write();
4517   }
4518 }
4519 
4520 
4521 /**
4522   Repair table: public interface.
4523 
4524   @sa handler::repair()
4525 */
4526 
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4527 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4528 {
4529   int result;
4530   mark_trx_read_write();
4531 
4532   result= repair(thd, check_opt);
4533   DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4534               ha_table_flags() & HA_CAN_REPAIR);
4535 
4536   if (result == HA_ADMIN_OK)
4537     result= update_frm_version(table);
4538   return result;
4539 }
4540 
4541 
4542 /**
4543   Start bulk insert.
4544 
4545   Allow the handler to optimize for multiple row insert.
4546 
4547   @param rows  Estimated rows to insert
4548 */
4549 
ha_start_bulk_insert(ha_rows rows)4550 void handler::ha_start_bulk_insert(ha_rows rows)
4551 {
4552   DBUG_ENTER("handler::ha_start_bulk_insert");
4553   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4554               m_lock_type == F_WRLCK);
4555   estimation_rows_to_insert= rows;
4556   start_bulk_insert(rows);
4557   DBUG_VOID_RETURN;
4558 }
4559 
4560 
4561 /**
4562   End bulk insert.
4563 
4564   @return Operation status
4565     @retval 0     Success
4566     @retval != 0  Failure (error code returned)
4567 */
4568 
ha_end_bulk_insert()4569 int handler::ha_end_bulk_insert()
4570 {
4571   DBUG_ENTER("handler::ha_end_bulk_insert");
4572   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4573               m_lock_type == F_WRLCK);
4574   estimation_rows_to_insert= 0;
4575   DBUG_RETURN(end_bulk_insert());
4576 }
4577 
4578 
4579 /**
4580   Bulk update row: public interface.
4581 
4582   @sa handler::bulk_update_row()
4583 */
4584 
4585 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4586 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4587                             uint *dup_key_found)
4588 {
4589   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4590               m_lock_type == F_WRLCK);
4591   mark_trx_read_write();
4592 
4593   return bulk_update_row(old_data, new_data, dup_key_found);
4594 }
4595 
4596 
4597 /**
4598   Delete all rows: public interface.
4599 
4600   @sa handler::delete_all_rows()
4601 */
4602 
4603 int
ha_delete_all_rows()4604 handler::ha_delete_all_rows()
4605 {
4606   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4607               m_lock_type == F_WRLCK);
4608   mark_trx_read_write();
4609 
4610   return delete_all_rows();
4611 }
4612 
4613 
4614 /**
4615   Truncate table: public interface.
4616 
4617   @sa handler::truncate()
4618 */
4619 
4620 int
ha_truncate()4621 handler::ha_truncate()
4622 {
4623   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4624               m_lock_type == F_WRLCK);
4625   mark_trx_read_write();
4626 
4627   return truncate();
4628 }
4629 
4630 
4631 /**
4632   Reset auto increment: public interface.
4633 
4634   @sa handler::reset_auto_increment()
4635 */
4636 
4637 int
ha_reset_auto_increment(ulonglong value)4638 handler::ha_reset_auto_increment(ulonglong value)
4639 {
4640   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4641               m_lock_type == F_WRLCK);
4642   mark_trx_read_write();
4643 
4644   return reset_auto_increment(value);
4645 }
4646 
4647 
4648 /**
4649   Optimize table: public interface.
4650 
4651   @sa handler::optimize()
4652 */
4653 
4654 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4655 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4656 {
4657   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4658               m_lock_type == F_WRLCK);
4659   mark_trx_read_write();
4660 
4661   return optimize(thd, check_opt);
4662 }
4663 
4664 
4665 /**
4666   Analyze table: public interface.
4667 
4668   @sa handler::analyze()
4669 */
4670 
4671 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4672 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4673 {
4674   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4675               m_lock_type != F_UNLCK);
4676   mark_trx_read_write();
4677 
4678   return analyze(thd, check_opt);
4679 }
4680 
4681 
4682 /**
4683   Check and repair table: public interface.
4684 
4685   @sa handler::check_and_repair()
4686 */
4687 
4688 bool
ha_check_and_repair(THD * thd)4689 handler::ha_check_and_repair(THD *thd)
4690 {
4691   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4692               m_lock_type == F_UNLCK);
4693   mark_trx_read_write();
4694 
4695   return check_and_repair(thd);
4696 }
4697 
4698 
4699 /**
4700   Disable indexes: public interface.
4701 
4702   @sa handler::disable_indexes()
4703 */
4704 
4705 int
ha_disable_indexes(uint mode)4706 handler::ha_disable_indexes(uint mode)
4707 {
4708   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4709               m_lock_type != F_UNLCK);
4710   mark_trx_read_write();
4711 
4712   return disable_indexes(mode);
4713 }
4714 
4715 
4716 /**
4717   Enable indexes: public interface.
4718 
4719   @sa handler::enable_indexes()
4720 */
4721 
4722 int
ha_enable_indexes(uint mode)4723 handler::ha_enable_indexes(uint mode)
4724 {
4725   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4726               m_lock_type != F_UNLCK);
4727   mark_trx_read_write();
4728 
4729   return enable_indexes(mode);
4730 }
4731 
4732 
4733 /**
4734   Discard or import tablespace: public interface.
4735 
4736   @sa handler::discard_or_import_tablespace()
4737 */
4738 
4739 int
ha_discard_or_import_tablespace(my_bool discard)4740 handler::ha_discard_or_import_tablespace(my_bool discard)
4741 {
4742   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4743               m_lock_type == F_WRLCK);
4744   mark_trx_read_write();
4745 
4746   return discard_or_import_tablespace(discard);
4747 }
4748 
4749 
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4750 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4751                                              Alter_inplace_info *ha_alter_info)
4752 {
4753   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4754               m_lock_type != F_UNLCK);
4755   mark_trx_read_write();
4756 
4757   return prepare_inplace_alter_table(altered_table, ha_alter_info);
4758 }
4759 
4760 
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4761 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4762                                             Alter_inplace_info *ha_alter_info,
4763                                             bool commit)
4764 {
4765    /*
4766      At this point we should have an exclusive metadata lock on the table.
4767      The exception is if we're about to roll back changes (commit= false).
4768      In this case, we might be rolling back after a failed lock upgrade,
4769      so we could be holding the same lock level as for inplace_alter_table().
4770      TABLE::mdl_ticket is 0 for temporary tables.
4771    */
4772    DBUG_ASSERT((table->s->tmp_table != NO_TMP_TABLE && !table->mdl_ticket) ||
4773                (ha_thd()->mdl_context.is_lock_owner(MDL_key::TABLE,
4774                                                    table->s->db.str,
4775                                                    table->s->table_name.str,
4776                                                    MDL_EXCLUSIVE) ||
4777                !commit));
4778 
4779    return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4780 }
4781 
4782 
4783 /*
4784    Default implementation to support in-place alter table
4785    and old online add/drop index API
4786 */
4787 
4788 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4789 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4790                                           Alter_inplace_info *ha_alter_info)
4791 {
4792   DBUG_ENTER("check_if_supported_alter");
4793 
4794   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4795 
4796   Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4797     Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4798     Alter_inplace_info::ALTER_COLUMN_NAME |
4799     Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4800     Alter_inplace_info::CHANGE_CREATE_OPTION |
4801     Alter_inplace_info::ALTER_RENAME;
4802 
4803   /* Is there at least one operation that requires copy algorithm? */
4804   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4805     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4806 
4807   /*
4808     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4809     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4810     change column charsets and so not supported in-place through
4811     old API.
4812 
4813     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4814     not supported as in-place operations in old API either.
4815   */
4816   if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4817                                   HA_CREATE_USED_DEFAULT_CHARSET |
4818                                   HA_CREATE_USED_PACK_KEYS |
4819                                   HA_CREATE_USED_MAX_ROWS) ||
4820       (table->s->row_type != create_info->row_type))
4821     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4822 
4823   uint table_changes= (ha_alter_info->handler_flags &
4824                        Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4825     IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4826   if (table->file->check_if_incompatible_data(create_info, table_changes)
4827       == COMPATIBLE_DATA_YES)
4828     DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
4829 
4830   DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4831 }
4832 
4833 
4834 /*
4835    Default implementation to support in-place alter table
4836    and old online add/drop index API
4837 */
4838 
notify_table_changed()4839 void handler::notify_table_changed()
4840 {
4841   ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
4842 }
4843 
4844 
report_unsupported_error(const char * not_supported,const char * try_instead)4845 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4846                                                   const char *try_instead)
4847 {
4848   if (unsupported_reason == NULL)
4849     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4850              not_supported, try_instead);
4851   else
4852     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4853              not_supported, unsupported_reason, try_instead);
4854 }
4855 
4856 
4857 /**
4858   Rename table: public interface.
4859 
4860   @sa handler::rename_table()
4861 */
4862 
4863 int
ha_rename_table(const char * from,const char * to)4864 handler::ha_rename_table(const char *from, const char *to)
4865 {
4866   DBUG_ASSERT(m_lock_type == F_UNLCK);
4867   mark_trx_read_write();
4868 
4869   return rename_table(from, to);
4870 }
4871 
4872 
4873 /**
4874   Delete table: public interface.
4875 
4876   @sa handler::delete_table()
4877 */
4878 
4879 int
ha_delete_table(const char * name)4880 handler::ha_delete_table(const char *name)
4881 {
4882   DBUG_ASSERT(m_lock_type == F_UNLCK);
4883   mark_trx_read_write();
4884 
4885   return delete_table(name);
4886 }
4887 
4888 
4889 /**
4890   Drop table in the engine: public interface.
4891 
4892   @sa handler::drop_table()
4893 */
4894 
4895 void
ha_drop_table(const char * name)4896 handler::ha_drop_table(const char *name)
4897 {
4898   DBUG_ASSERT(m_lock_type == F_UNLCK);
4899   mark_trx_read_write();
4900 
4901   return drop_table(name);
4902 }
4903 
4904 
4905 /**
4906   Create a table in the engine: public interface.
4907 
4908   @sa handler::create()
4909 */
4910 
4911 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)4912 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
4913 {
4914   DBUG_ASSERT(m_lock_type == F_UNLCK);
4915   mark_trx_read_write();
4916 
4917   return create(name, form, info);
4918 }
4919 
4920 
4921 /**
4922   Create handler files for CREATE TABLE: public interface.
4923 
4924   @sa handler::create_handler_files()
4925 */
4926 
4927 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)4928 handler::ha_create_handler_files(const char *name, const char *old_name,
4929                         int action_flag, HA_CREATE_INFO *info)
4930 {
4931   /*
4932     Normally this is done when unlocked, but in fast_alter_partition_table,
4933     it is done on an already locked handler when preparing to alter/rename
4934     partitions.
4935   */
4936   DBUG_ASSERT(m_lock_type == F_UNLCK ||
4937               (!old_name && strcmp(name, table_share->path.str)));
4938   mark_trx_read_write();
4939 
4940   return create_handler_files(name, old_name, action_flag, info);
4941 }
4942 
4943 
4944 /**
4945   Change partitions: public interface.
4946 
4947   @sa handler::change_partitions()
4948 */
4949 
4950 int
ha_change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)4951 handler::ha_change_partitions(HA_CREATE_INFO *create_info,
4952                      const char *path,
4953                      ulonglong * const copied,
4954                      ulonglong * const deleted,
4955                      const uchar *pack_frm_data,
4956                      size_t pack_frm_len)
4957 {
4958   /*
4959     Must have at least RDLCK or be a TMP table. Read lock is needed to read
4960     from current partitions and write lock will be taken on new partitions.
4961   */
4962   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4963               m_lock_type != F_UNLCK);
4964   mark_trx_read_write();
4965 
4966   return change_partitions(create_info, path, copied, deleted,
4967                            pack_frm_data, pack_frm_len);
4968 }
4969 
4970 
4971 /**
4972   Drop partitions: public interface.
4973 
4974   @sa handler::drop_partitions()
4975 */
4976 
4977 int
ha_drop_partitions(const char * path)4978 handler::ha_drop_partitions(const char *path)
4979 {
4980   DBUG_ASSERT(!table->db_stat);
4981 
4982   mark_trx_read_write();
4983 
4984   return drop_partitions(path);
4985 }
4986 
4987 
4988 /**
4989   Rename partitions: public interface.
4990 
4991   @sa handler::rename_partitions()
4992 */
4993 
4994 int
ha_rename_partitions(const char * path)4995 handler::ha_rename_partitions(const char *path)
4996 {
4997   DBUG_ASSERT(!table->db_stat);
4998   mark_trx_read_write();
4999 
5000   return rename_partitions(path);
5001 }
5002 
5003 
5004 /**
5005   Tell the storage engine that it is allowed to "disable transaction" in the
5006   handler. It is a hint that ACID is not required - it is used in NDB for
5007   ALTER TABLE, for example, when data are copied to temporary table.
5008   A storage engine may treat this hint any way it likes. NDB for example
5009   starts to commit every now and then automatically.
5010   This hint can be safely ignored.
5011 */
ha_enable_transaction(THD * thd,bool on)5012 int ha_enable_transaction(THD *thd, bool on)
5013 {
5014   int error=0;
5015   DBUG_ENTER("ha_enable_transaction");
5016   DBUG_PRINT("enter", ("on: %d", (int) on));
5017 
5018   if ((thd->transaction.flags.enabled= on))
5019   {
5020     /*
5021       Now all storage engines should have transaction handling enabled.
5022       But some may have it enabled all the time - "disabling" transactions
5023       is an optimization hint that storage engine is free to ignore.
5024       So, let's commit an open transaction (if any) now.
5025     */
5026     if (!(error= ha_commit_trans(thd, 0)))
5027       error= trans_commit_implicit(thd);
5028   }
5029   DBUG_RETURN(error);
5030 }
5031 
index_next_same(uchar * buf,const uchar * key,uint keylen)5032 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5033 {
5034   int error;
5035   DBUG_ENTER("index_next_same");
5036   if (!(error=index_next(buf)))
5037   {
5038     my_ptrdiff_t ptrdiff= buf - table->record[0];
5039     uchar *UNINIT_VAR(save_record_0);
5040     KEY *UNINIT_VAR(key_info);
5041     KEY_PART_INFO *UNINIT_VAR(key_part);
5042     KEY_PART_INFO *UNINIT_VAR(key_part_end);
5043 
5044     /*
5045       key_cmp_if_same() compares table->record[0] against 'key'.
5046       In parts it uses table->record[0] directly, in parts it uses
5047       field objects with their local pointers into table->record[0].
5048       If 'buf' is distinct from table->record[0], we need to move
5049       all record references. This is table->record[0] itself and
5050       the field pointers of the fields used in this key.
5051     */
5052     if (ptrdiff)
5053     {
5054       save_record_0= table->record[0];
5055       table->record[0]= buf;
5056       key_info= table->key_info + active_index;
5057       key_part= key_info->key_part;
5058       key_part_end= key_part + key_info->user_defined_key_parts;
5059       for (; key_part < key_part_end; key_part++)
5060       {
5061         DBUG_ASSERT(key_part->field);
5062         key_part->field->move_field_offset(ptrdiff);
5063       }
5064     }
5065 
5066     if (key_cmp_if_same(table, key, active_index, keylen))
5067     {
5068       table->status=STATUS_NOT_FOUND;
5069       error=HA_ERR_END_OF_FILE;
5070     }
5071 
5072     /* Move back if necessary. */
5073     if (ptrdiff)
5074     {
5075       table->record[0]= save_record_0;
5076       for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5077         key_part->field->move_field_offset(-ptrdiff);
5078     }
5079   }
5080   DBUG_RETURN(error);
5081 }
5082 
5083 
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)5084 void handler::get_dynamic_partition_info(PARTITION_STATS *stat_info,
5085                                          uint part_id)
5086 {
5087   info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE |
5088        HA_STATUS_NO_LOCK);
5089   stat_info->records=              stats.records;
5090   stat_info->mean_rec_length=      stats.mean_rec_length;
5091   stat_info->data_file_length=     stats.data_file_length;
5092   stat_info->max_data_file_length= stats.max_data_file_length;
5093   stat_info->index_file_length=    stats.index_file_length;
5094   stat_info->delete_length=        stats.delete_length;
5095   stat_info->create_time=          stats.create_time;
5096   stat_info->update_time=          stats.update_time;
5097   stat_info->check_time=           stats.check_time;
5098   stat_info->check_sum=            0;
5099   if (table_flags() & (ulong) HA_HAS_CHECKSUM)
5100     stat_info->check_sum= checksum();
5101   return;
5102 }
5103 
5104 // Updates the global table stats with the TABLE this handler represents.
update_global_table_stats()5105 void handler::update_global_table_stats()
5106 {
5107   if (!rows_read && !rows_changed)
5108     return;  // Nothing to update.
5109   // table_cache_key is db_name + '\0' + table_name + '\0'.
5110   if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
5111     return;
5112 
5113   TABLE_STATS* table_stats;
5114   char key[NAME_LEN * 2 + 2];
5115   // [db] + '.' + [table]
5116   sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
5117 
5118   mysql_mutex_lock(&LOCK_global_table_stats);
5119   // Gets the global table stats, creating one if necessary.
5120   if (!(table_stats = (TABLE_STATS *) my_hash_search(&global_table_stats,
5121                                                      (uchar*)key,
5122                                                      strlen(key))))
5123   {
5124     if (!(table_stats = ((TABLE_STATS *)
5125                          my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL)))))
5126     {
5127       // Out of memory.
5128       sql_print_error("Allocating table stats failed.");
5129       goto end;
5130     }
5131     strncpy(table_stats->table, key, sizeof(table_stats->table));
5132     table_stats->table_len=              strlen(table_stats->table);
5133     table_stats->rows_read=              0;
5134     table_stats->rows_changed=           0;
5135     table_stats->rows_changed_x_indexes= 0;
5136     table_stats->engine_type=            (int) ht->db_type;
5137 
5138     if (my_hash_insert(&global_table_stats, (uchar *) table_stats))
5139     {
5140       // Out of memory.
5141       sql_print_error("Inserting table stats failed.");
5142       my_free((char *) table_stats);
5143       goto end;
5144     }
5145   }
5146   // Updates the global table stats.
5147   table_stats->rows_read+=              rows_read;
5148   table_stats->rows_changed+=           rows_changed;
5149   table_stats->rows_changed_x_indexes+=
5150     rows_changed * (table->s->keys ? table->s->keys : 1);
5151   ha_thd()->diff_total_read_rows+=   rows_read;
5152   rows_read= rows_changed=              0;
5153 end:
5154   mysql_mutex_unlock(&LOCK_global_table_stats);
5155 }
5156 
5157 // Updates the global index stats with this handler's accumulated index reads.
update_global_index_stats()5158 void handler::update_global_index_stats()
5159 {
5160   // table_cache_key is db_name + '\0' + table_name + '\0'.
5161   if (!table || !table->s || !table->s->table_cache_key.str ||
5162       !table->s->table_name.str)
5163     return;
5164 
5165   for (uint x = 0; x < table->s->keys; ++x)
5166   {
5167     if (index_rows_read[x])
5168     {
5169       // Rows were read using this index.
5170       KEY* key_info = &table->key_info[x];
5171 
5172       if (!key_info->name) continue;
5173 
5174       INDEX_STATS* index_stats;
5175       char key[NAME_LEN * 3 + 3];
5176       // [db] + '.' + [table] + '.' + [index]
5177       sprintf(key, "%s.%s.%s",  table->s->table_cache_key.str,
5178               table->s->table_name.str, key_info->name);
5179 
5180       mysql_mutex_lock(&LOCK_global_index_stats);
5181       // Gets the global index stats, creating one if necessary.
5182       if (!(index_stats = (INDEX_STATS *) my_hash_search(&global_index_stats,
5183                                                          (uchar *) key,
5184                                                          strlen(key))))
5185       {
5186         if (!(index_stats = ((INDEX_STATS *)
5187                              my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL)))))
5188         {
5189           // Out of memory.
5190           sql_print_error("Allocating index stats failed.");
5191           goto end;
5192         }
5193         strncpy(index_stats->index, key, sizeof(index_stats->index));
5194         index_stats->index_len= strlen(index_stats->index);
5195         index_stats->rows_read= 0;
5196 
5197         if (my_hash_insert(&global_index_stats, (uchar *) index_stats))
5198         {
5199           // Out of memory.
5200           sql_print_error("Inserting index stats failed.");
5201           my_free((char *) index_stats);
5202           goto end;
5203         }
5204       }
5205       // Updates the global index stats.
5206       index_stats->rows_read+= index_rows_read[x];
5207       index_rows_read[x]=      0;
5208   end:
5209       mysql_mutex_unlock(&LOCK_global_index_stats);
5210     }
5211   }
5212 }
5213 
5214 /****************************************************************************
5215 ** Some general functions that isn't in the handler class
5216 ****************************************************************************/
5217 
5218 /**
5219   Initiates table-file and calls appropriate database-creator.
5220 
5221   @retval
5222    0  ok
5223   @retval
5224    1  error
5225 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,const List<Create_field> * create_fields,bool update_create_info,bool is_temp_table)5226 int ha_create_table(THD *thd, const char *path,
5227                     const char *db, const char *table_name,
5228                     HA_CREATE_INFO *create_info,
5229                     const List<Create_field> *create_fields,
5230                     bool update_create_info,
5231                     bool is_temp_table)
5232 {
5233   int error= 1;
5234   TABLE table;
5235   char name_buff[FN_REFLEN];
5236   const char *name;
5237   TABLE_SHARE share;
5238   bool saved_abort_on_warning;
5239 #ifdef HAVE_PSI_TABLE_INTERFACE
5240   bool temp_table = is_temp_table ||
5241     (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5242     (strstr(path, tmp_file_prefix) != NULL);
5243 #endif
5244   DBUG_ENTER("ha_create_table");
5245 
5246   init_tmp_table_share(thd, &share, db, 0, table_name, path);
5247   if (open_table_def(thd, &share, 0))
5248     goto err;
5249 
5250 #ifdef HAVE_PSI_TABLE_INTERFACE
5251   share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5252 #endif
5253 
5254   if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5255                             TRUE))
5256   {
5257 #ifdef HAVE_PSI_TABLE_INTERFACE
5258     PSI_TABLE_CALL(drop_table_share)
5259       (temp_table, db, strlen(db), table_name, strlen(table_name));
5260 #endif
5261     goto err;
5262   }
5263 
5264   if (update_create_info)
5265     update_create_info_from_table(create_info, &table);
5266 
5267   /*
5268   Updating field definitions in 'table' with zip_dict_name values
5269   from 'create_fields'
5270   */
5271   if (create_fields != 0)
5272   {
5273     table.update_compressed_columns_info(*create_fields);
5274   }
5275 
5276   name= get_canonical_filename(table.file, share.path.str, name_buff);
5277 
5278   saved_abort_on_warning = thd->abort_on_warning;
5279   thd->abort_on_warning = false;
5280   error= table.file->ha_create(name, &table, create_info);
5281   thd->abort_on_warning = saved_abort_on_warning;
5282   if (error)
5283   {
5284     table.file->print_error(error, MYF(0));
5285 #ifdef HAVE_PSI_TABLE_INTERFACE
5286     PSI_TABLE_CALL(drop_table_share)
5287       (temp_table, db, strlen(db), table_name, strlen(table_name));
5288 #endif
5289   }
5290   (void) closefrm(&table, 0);
5291 err:
5292   free_table_share(&share);
5293   DBUG_RETURN(error != 0);
5294 }
5295 
5296 /**
5297   Try to discover table from engine.
5298 
5299   @note
5300     If found, write the frm file to disk.
5301 
5302   @retval
5303   -1    Table did not exists
5304   @retval
5305    0    Table created ok
5306   @retval
5307    > 0  Error, table existed but could not be created
5308 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5309 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5310 {
5311   int error;
5312   uchar *frmblob;
5313   size_t frmlen;
5314   char path[FN_REFLEN + 1];
5315   HA_CREATE_INFO create_info;
5316   TABLE table;
5317   TABLE_SHARE share;
5318   DBUG_ENTER("ha_create_table_from_engine");
5319   DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5320 
5321   memset(static_cast<void*>(&create_info), 0, sizeof(create_info));
5322   if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5323   {
5324     /* Table could not be discovered and thus not created */
5325     DBUG_RETURN(error);
5326   }
5327 
5328   /*
5329     Table exists in handler and could be discovered
5330     frmblob and frmlen are set, write the frm to disk
5331   */
5332 
5333   build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5334   // Save the frm file
5335   error= writefrm(path, frmblob, frmlen);
5336   my_free(frmblob);
5337   if (error)
5338     DBUG_RETURN(2);
5339 
5340   init_tmp_table_share(thd, &share, db, 0, name, path);
5341   if (open_table_def(thd, &share, 0))
5342   {
5343     DBUG_RETURN(3);
5344   }
5345 
5346 #ifdef HAVE_PSI_TABLE_INTERFACE
5347   /*
5348     Table discovery is not instrumented.
5349     Once discovered, the table will be opened normally,
5350     and instrumented normally.
5351   */
5352 #endif
5353 
5354   if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5355   {
5356     free_table_share(&share);
5357     DBUG_RETURN(3);
5358   }
5359 
5360   update_create_info_from_table(&create_info, &table);
5361   create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5362 
5363   get_canonical_filename(table.file, path, path);
5364   error=table.file->ha_create(path, &table, &create_info);
5365   (void) closefrm(&table, 1);
5366 
5367   DBUG_RETURN(error != 0);
5368 }
5369 
5370 
5371 /**
5372   Try to find a table in a storage engine.
5373 
5374   @param db   Normalized table schema name
5375   @param name Normalized table name.
5376   @param[out] exists Only valid if the function succeeded.
5377 
5378   @retval TRUE   An error is found
5379   @retval FALSE  Success, check *exists
5380 */
5381 
5382 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5383 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5384                          bool *exists)
5385 {
5386   uchar *frmblob= NULL;
5387   size_t frmlen;
5388   DBUG_ENTER("ha_check_if_table_exists");
5389 
5390   *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5391   if (*exists)
5392     my_free(frmblob);
5393 
5394   DBUG_RETURN(FALSE);
5395 }
5396 
5397 /**
5398   @brief Check if a given table is a system table.
5399 
5400   @details The primary purpose of introducing this function is to stop system
5401   tables to be created or being moved to undesired storage engines.
5402 
5403   @todo There is another function called is_system_table_name() used by
5404         get_table_category(), which is used to set TABLE_SHARE table_category.
5405         It checks only a subset of table name like proc, event and time*.
5406         We cannot use below function in get_table_category(),
5407         as that affects locking mechanism. If we need to
5408         unify these functions, we need to fix locking issues generated.
5409 
5410   @param   hton                  Handlerton of new engine.
5411   @param   db                    Database name.
5412   @param   table_name            Table name to be checked.
5413 
5414   @return Operation status
5415     @retval  true                If the table name is a valid system table
5416                                  or if its a valid user table.
5417 
5418     @retval  false               If the table name is a system table name
5419                                  and does not belong to engine specified
5420                                  in the command.
5421 */
ha_check_if_supported_system_table(handlerton * hton,const char * db,const char * table_name)5422 bool ha_check_if_supported_system_table(handlerton *hton, const char *db,
5423                                         const char *table_name)
5424 {
5425   DBUG_ENTER("ha_check_if_supported_system_table");
5426   st_sys_tbl_chk_params check_params;
5427   bool is_system_database= false;
5428   const char **names;
5429   st_system_tablename *systab;
5430 
5431   // Check if we have a system database name in the command.
5432   DBUG_ASSERT(known_system_databases != NULL);
5433   names= known_system_databases;
5434   while (names && *names)
5435   {
5436     if (strcmp(*names, db) == 0)
5437     {
5438       /* Used to compare later, will be faster */
5439       check_params.db= *names;
5440       is_system_database= true;
5441       break;
5442     }
5443     names++;
5444   }
5445   if (!is_system_database)
5446     DBUG_RETURN(true); // It's a user table name.
5447 
5448   // Check if this is SQL layer system tables.
5449   systab= mysqld_system_tables;
5450   check_params.is_sql_layer_system_table= false;
5451   while (systab && systab->db)
5452   {
5453     if (systab->db == check_params.db &&
5454         strcmp(systab->tablename, table_name) == 0)
5455     {
5456       check_params.is_sql_layer_system_table= true;
5457       break;
5458     }
5459     systab++;
5460   }
5461 
5462   // Check if this is a system table and if some engine supports it.
5463   check_params.status= check_params.is_sql_layer_system_table ?
5464     st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE :
5465     st_sys_tbl_chk_params::NOT_KNOWN_SYSTEM_TABLE;
5466   check_params.db_type= hton->db_type;
5467   check_params.table_name= table_name;
5468   plugin_foreach(NULL, check_engine_system_table_handlerton,
5469                  MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5470 
5471   // SE does not support this system table.
5472   if (check_params.status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5473     DBUG_RETURN(false);
5474 
5475   // It's a system table or a valid user table.
5476   DBUG_RETURN(true);
5477 }
5478 
5479 /**
5480   @brief Called for each SE to check if given db, tablename is a system table.
5481 
5482   @details The primary purpose of introducing this function is to stop system
5483   tables to be created or being moved to undesired storage engines.
5484 
5485   @param   unused  unused THD*
5486   @param   plugin  Points to specific SE.
5487   @param   arg     Is of type struct st_sys_tbl_chk_params.
5488 
5489   @note
5490     args->status   Indicates OUT param,
5491                    see struct st_sys_tbl_chk_params definition for more info.
5492 
5493   @return Operation status
5494     @retval  true  There was a match found.
5495                    This will stop doing checks with other SE's.
5496 
5497     @retval  false There was no match found.
5498                    Other SE's will be checked to find a match.
5499 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5500 static my_bool check_engine_system_table_handlerton(THD *unused,
5501                                                     plugin_ref plugin,
5502                                                     void *arg)
5503 {
5504   st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5505   handlerton *hton= plugin_data(plugin, handlerton *);
5506 
5507   // Do we already know that the table is a system table?
5508   if (check_params->status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5509   {
5510     /*
5511       If this is the same SE specified in the command, we can
5512       simply ask the SE if it supports it stop the search regardless.
5513     */
5514     if (hton->db_type == check_params->db_type)
5515     {
5516       if (hton->is_supported_system_table &&
5517           hton->is_supported_system_table(check_params->db,
5518                                        check_params->table_name,
5519                                        check_params->is_sql_layer_system_table))
5520         check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5521       return TRUE;
5522     }
5523     /*
5524       If this is a different SE, there is no point in asking the SE
5525       since we already know it's a system table and we don't care
5526       if it is supported or not.
5527     */
5528     return FALSE;
5529   }
5530 
5531   /*
5532     We don't yet know if the table is a system table or not.
5533     We therefore must always ask the SE.
5534   */
5535   if (hton->is_supported_system_table &&
5536       hton->is_supported_system_table(check_params->db,
5537                                       check_params->table_name,
5538                                       check_params->is_sql_layer_system_table))
5539   {
5540     /*
5541       If this is the same SE specified in the command, we know it's a
5542       supported system table and can stop the search.
5543     */
5544     if (hton->db_type == check_params->db_type)
5545     {
5546       check_params->status= st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5547       return TRUE;
5548     }
5549     else
5550       check_params->status= st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE;
5551   }
5552 
5553   return FALSE;
5554 }
5555 
5556 /*
5557   Prepare list of all known system database names
5558   current we just have 'mysql' as system database name.
5559 
5560   Later ndbcluster, innodb SE's can define some new database
5561   name which can store system tables specific to SE.
5562 */
ha_known_system_databases(void)5563 const char** ha_known_system_databases(void)
5564 {
5565   list<const char*> found_databases;
5566   const char **databases, **database;
5567 
5568   // Get mysqld system database name.
5569   found_databases.push_back((char*) mysqld_system_database);
5570 
5571   // Get system database names from every specific storage engine.
5572   plugin_foreach(NULL, system_databases_handlerton,
5573                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5574 
5575   databases= (const char **) my_once_alloc(sizeof(char *)*
5576                                      (found_databases.size()+1),
5577                                      MYF(MY_WME | MY_FAE));
5578   DBUG_ASSERT(databases != NULL);
5579 
5580   list<const char*>::iterator it;
5581   database= databases;
5582   for (it= found_databases.begin(); it != found_databases.end(); it++)
5583     *database++= *it;
5584   *database= 0; // Last element.
5585 
5586   return databases;
5587 }
5588 
5589 /**
5590   @brief Fetch system database name specific to SE.
5591 
5592   @details This function is invoked by plugin_foreach() from
5593            ha_known_system_databases(), for each storage engine.
5594 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5595 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5596                                            void *arg)
5597 {
5598   list<const char*> *found_databases= (list<const char*> *) arg;
5599   const char *db;
5600 
5601   handlerton *hton= plugin_data(plugin, handlerton *);
5602   if (hton->system_database)
5603   {
5604     db= hton->system_database();
5605     if (db)
5606       found_databases->push_back(db);
5607   }
5608 
5609   return FALSE;
5610 }
5611 
init()5612 void st_ha_check_opt::init()
5613 {
5614   flags= sql_flags= 0;
5615 }
5616 
5617 
5618 /*****************************************************************************
5619   Key cache handling.
5620 
5621   This code is only relevant for ISAM/MyISAM tables
5622 
5623   key_cache->cache may be 0 only in the case where a key cache is not
5624   initialized or when we where not able to init the key cache in a previous
5625   call to ha_init_key_cache() (probably out of memory)
5626 *****************************************************************************/
5627 
5628 /**
5629   Init a key cache if it has not been initied before.
5630 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5631 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5632 {
5633   DBUG_ENTER("ha_init_key_cache");
5634 
5635   if (!key_cache->key_cache_inited)
5636   {
5637     mysql_mutex_lock(&LOCK_global_system_variables);
5638     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5639     uint tmp_block_size= (uint) key_cache->param_block_size;
5640     uint division_limit= key_cache->param_division_limit;
5641     uint age_threshold=  key_cache->param_age_threshold;
5642     mysql_mutex_unlock(&LOCK_global_system_variables);
5643     DBUG_RETURN(!init_key_cache(key_cache,
5644 				tmp_block_size,
5645 				tmp_buff_size,
5646 				division_limit, age_threshold));
5647   }
5648   DBUG_RETURN(0);
5649 }
5650 
5651 
5652 /**
5653   Resize key cache.
5654 */
ha_resize_key_cache(KEY_CACHE * key_cache)5655 int ha_resize_key_cache(KEY_CACHE *key_cache)
5656 {
5657   DBUG_ENTER("ha_resize_key_cache");
5658 
5659   if (key_cache->key_cache_inited)
5660   {
5661     mysql_mutex_lock(&LOCK_global_system_variables);
5662     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5663     long tmp_block_size= (long) key_cache->param_block_size;
5664     uint division_limit= key_cache->param_division_limit;
5665     uint age_threshold=  key_cache->param_age_threshold;
5666     mysql_mutex_unlock(&LOCK_global_system_variables);
5667     DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
5668 				  tmp_buff_size,
5669 				  division_limit, age_threshold));
5670   }
5671   DBUG_RETURN(0);
5672 }
5673 
5674 
5675 /**
5676   Change parameters for key cache (like size)
5677 */
ha_change_key_cache_param(KEY_CACHE * key_cache)5678 int ha_change_key_cache_param(KEY_CACHE *key_cache)
5679 {
5680   if (key_cache->key_cache_inited)
5681   {
5682     mysql_mutex_lock(&LOCK_global_system_variables);
5683     uint division_limit= key_cache->param_division_limit;
5684     uint age_threshold=  key_cache->param_age_threshold;
5685     mysql_mutex_unlock(&LOCK_global_system_variables);
5686     change_key_cache_param(key_cache, division_limit, age_threshold);
5687   }
5688   return 0;
5689 }
5690 
5691 /**
5692   Move all tables from one key cache to another one.
5693 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5694 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5695 			KEY_CACHE *new_key_cache)
5696 {
5697   mi_change_key_cache(old_key_cache, new_key_cache);
5698   return 0;
5699 }
5700 
5701 
5702 /**
5703   Try to discover one table from handler(s).
5704 
5705   @retval
5706     -1   Table did not exists
5707   @retval
5708     0   OK. In this case *frmblob and *frmlen are set
5709   @retval
5710     >0   error.  frmblob and frmlen may not be set
5711 */
5712 struct st_discover_args
5713 {
5714   const char *db;
5715   const char *name;
5716   uchar **frmblob;
5717   size_t *frmlen;
5718 };
5719 
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5720 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5721                                    void *arg)
5722 {
5723   st_discover_args *vargs= (st_discover_args *)arg;
5724   handlerton *hton= plugin_data(plugin, handlerton *);
5725   if (hton->state == SHOW_OPTION_YES && hton->discover &&
5726       (!(hton->discover(hton, thd, vargs->db, vargs->name,
5727                         vargs->frmblob,
5728                         vargs->frmlen))))
5729     return TRUE;
5730 
5731   return FALSE;
5732 }
5733 
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5734 int ha_discover(THD *thd, const char *db, const char *name,
5735 		uchar **frmblob, size_t *frmlen)
5736 {
5737   int error= -1; // Table does not exist in any handler
5738   DBUG_ENTER("ha_discover");
5739   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5740   st_discover_args args= {db, name, frmblob, frmlen};
5741 
5742   if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5743     DBUG_RETURN(error);
5744 
5745   if (plugin_foreach(thd, discover_handlerton,
5746                  MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5747     error= 0;
5748 
5749   if (!error)
5750   {
5751     DBUG_ASSERT(!thd->status_var_aggregated);
5752     status_var_increment(thd->status_var.ha_discover_count);
5753   }
5754   DBUG_RETURN(error);
5755 }
5756 
5757 
5758 /**
5759   Call this function in order to give the handler the possiblity
5760   to ask engine if there are any new tables that should be written to disk
5761   or any dropped tables that need to be removed from disk
5762 */
5763 struct st_find_files_args
5764 {
5765   const char *db;
5766   const char *path;
5767   const char *wild;
5768   bool dir;
5769   List<LEX_STRING> *files;
5770 };
5771 
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5772 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5773                                    void *arg)
5774 {
5775   st_find_files_args *vargs= (st_find_files_args *)arg;
5776   handlerton *hton= plugin_data(plugin, handlerton *);
5777 
5778 
5779   if (hton->state == SHOW_OPTION_YES && hton->find_files)
5780       if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5781                           vargs->dir, vargs->files))
5782         return TRUE;
5783 
5784   return FALSE;
5785 }
5786 
5787 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5788 ha_find_files(THD *thd,const char *db,const char *path,
5789 	      const char *wild, bool dir, List<LEX_STRING> *files)
5790 {
5791   int error= 0;
5792   DBUG_ENTER("ha_find_files");
5793   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d",
5794 		       db, path, wild ? wild : "NULL", dir));
5795   st_find_files_args args= {db, path, wild, dir, files};
5796 
5797   plugin_foreach(thd, find_files_handlerton,
5798                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5799   /* The return value is not currently used */
5800   DBUG_RETURN(error);
5801 }
5802 
5803 /**
5804   Ask handler if the table exists in engine.
5805   @retval
5806     HA_ERR_NO_SUCH_TABLE     Table does not exist
5807   @retval
5808     HA_ERR_TABLE_EXIST       Table exists
5809   @retval
5810     \#                  Error code
5811 */
5812 struct st_table_exists_in_engine_args
5813 {
5814   const char *db;
5815   const char *name;
5816   int err;
5817 };
5818 
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5819 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5820                                    void *arg)
5821 {
5822   st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5823   handlerton *hton= plugin_data(plugin, handlerton *);
5824 
5825   int err= HA_ERR_NO_SUCH_TABLE;
5826 
5827   if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5828     err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5829 
5830   vargs->err = err;
5831   if (vargs->err == HA_ERR_TABLE_EXIST)
5832     return TRUE;
5833 
5834   return FALSE;
5835 }
5836 
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5837 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5838 {
5839   DBUG_ENTER("ha_table_exists_in_engine");
5840   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5841   st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5842   plugin_foreach(thd, table_exists_in_engine_handlerton,
5843                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5844   DBUG_PRINT("exit", ("error: %d", args.err));
5845   DBUG_RETURN(args.err);
5846 }
5847 
5848 /**
5849   Prepare (sub-) sequences of joins in this statement
5850   which may be pushed to each storage engine for execution.
5851 */
5852 struct st_make_pushed_join_args
5853 {
5854   const AQP::Join_plan* plan; // Query plan provided by optimizer
5855   int err;                    // Error code to return.
5856 };
5857 
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5858 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5859                                    void *arg)
5860 {
5861   st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5862   handlerton *hton= plugin_data(plugin, handlerton *);
5863 
5864   if (hton && hton->make_pushed_join)
5865   {
5866     const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5867     if (unlikely(error))
5868     {
5869       vargs->err = error;
5870       return TRUE;
5871     }
5872   }
5873   return FALSE;
5874 }
5875 
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5876 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5877 {
5878   DBUG_ENTER("ha_make_pushed_joins");
5879   st_make_pushed_join_args args= {plan, 0};
5880   plugin_foreach(thd, make_pushed_join_handlerton,
5881                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5882   DBUG_PRINT("exit", ("error: %d", args.err));
5883   DBUG_RETURN(args.err);
5884 }
5885 
5886 /*
5887   TODO: change this into a dynamic struct
5888   List<handlerton> does not work as
5889   1. binlog_end is called when MEM_ROOT is gone
5890   2. cannot work with thd MEM_ROOT as memory should be freed
5891 */
5892 #define MAX_HTON_LIST_ST 63
5893 struct hton_list_st
5894 {
5895   handlerton *hton[MAX_HTON_LIST_ST];
5896   uint sz;
5897 };
5898 
5899 struct binlog_func_st
5900 {
5901   enum_binlog_func fn;
5902   void *arg;
5903 };
5904 
5905 /** @brief
5906   Listing handlertons first to avoid recursive calls and deadlock
5907 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5908 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5909 {
5910   hton_list_st *hton_list= (hton_list_st *)arg;
5911   handlerton *hton= plugin_data(plugin, handlerton *);
5912   if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5913   {
5914     uint sz= hton_list->sz;
5915     if (sz == MAX_HTON_LIST_ST-1)
5916     {
5917       /* list full */
5918       return FALSE;
5919     }
5920     hton_list->hton[sz]= hton;
5921     hton_list->sz= sz+1;
5922   }
5923   return FALSE;
5924 }
5925 
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5926 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5927 {
5928   hton_list_st hton_list;
5929   uint i, sz;
5930 
5931   hton_list.sz= 0;
5932   plugin_foreach(thd, binlog_func_list,
5933                  MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5934 
5935   for (i= 0, sz= hton_list.sz; i < sz ; i++)
5936     hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5937   return FALSE;
5938 }
5939 
5940 #ifdef HAVE_NDB_BINLOG
5941 
ha_reset_logs(THD * thd)5942 int ha_reset_logs(THD *thd)
5943 {
5944   binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5945   binlog_func_foreach(thd, &bfn);
5946   return 0;
5947 }
5948 
ha_reset_slave(THD * thd)5949 void ha_reset_slave(THD* thd)
5950 {
5951   binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5952   binlog_func_foreach(thd, &bfn);
5953 }
5954 
ha_binlog_wait(THD * thd)5955 void ha_binlog_wait(THD* thd)
5956 {
5957   binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5958   binlog_func_foreach(thd, &bfn);
5959 }
5960 
ha_binlog_index_purge_file(THD * thd,const char * file)5961 int ha_binlog_index_purge_file(THD *thd, const char *file)
5962 {
5963   binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5964   binlog_func_foreach(thd, &bfn);
5965   return 0;
5966 }
5967 
5968 struct binlog_log_query_st
5969 {
5970   enum_binlog_command binlog_command;
5971   const char *query;
5972   uint query_length;
5973   const char *db;
5974   const char *table_name;
5975 };
5976 
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5977 static my_bool binlog_log_query_handlerton2(THD *thd,
5978                                             handlerton *hton,
5979                                             void *args)
5980 {
5981   struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
5982   if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5983     hton->binlog_log_query(hton, thd,
5984                            b->binlog_command,
5985                            b->query,
5986                            b->query_length,
5987                            b->db,
5988                            b->table_name);
5989   return FALSE;
5990 }
5991 
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5992 static my_bool binlog_log_query_handlerton(THD *thd,
5993                                            plugin_ref plugin,
5994                                            void *args)
5995 {
5996   return binlog_log_query_handlerton2(thd, plugin_data(plugin, handlerton *), args);
5997 }
5998 
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,uint query_length,const char * db,const char * table_name)5999 void ha_binlog_log_query(THD *thd, handlerton *hton,
6000                          enum_binlog_command binlog_command,
6001                          const char *query, uint query_length,
6002                          const char *db, const char *table_name)
6003 {
6004   struct binlog_log_query_st b;
6005   b.binlog_command= binlog_command;
6006   b.query= query;
6007   b.query_length= query_length;
6008   b.db= db;
6009   b.table_name= table_name;
6010   if (hton == 0)
6011     plugin_foreach(thd, binlog_log_query_handlerton,
6012                    MYSQL_STORAGE_ENGINE_PLUGIN, &b);
6013   else
6014     binlog_log_query_handlerton2(thd, hton, &b);
6015 }
6016 #endif
6017 
ha_binlog_end(THD * thd)6018 int ha_binlog_end(THD* thd)
6019 {
6020   binlog_func_st bfn= {BFN_BINLOG_END, 0};
6021   binlog_func_foreach(thd, &bfn);
6022   return 0;
6023 }
6024 
6025 /**
6026   Calculate cost of 'index only' scan for given index and number of records
6027 
6028   @param keynr    Index number
6029   @param records  Estimated number of records to be retrieved
6030 
6031   @note
6032     It is assumed that we will read trough the whole key range and that all
6033     key blocks are half full (normally things are much better). It is also
6034     assumed that each time we read the next key from the index, the handler
6035     performs a random seek, thus the cost is proportional to the number of
6036     blocks read.
6037 
6038   @todo
6039     Consider joining this function and handler::read_time() into one
6040     handler::read_time(keynr, records, ranges, bool index_only) function.
6041 
6042   @return
6043     Estimated cost of 'index only' scan
6044 */
6045 
index_only_read_time(uint keynr,double records)6046 double handler::index_only_read_time(uint keynr, double records)
6047 {
6048   double read_time;
6049   uint keys_per_block= (stats.block_size/2/
6050                         (table_share->key_info[keynr].key_length + ref_length) +
6051                         1);
6052   read_time=((double) (records + keys_per_block-1) /
6053              (double) keys_per_block);
6054   return read_time;
6055 }
6056 
6057 
6058 /**
6059   Check if key has partially-covered columns
6060 
6061   We can't use DS-MRR to perform range scans when the ranges are over
6062   partially-covered keys, because we'll not have full key part values
6063   (we'll have their prefixes from the index) and will not be able to check
6064   if we've reached the end the range.
6065 
6066   @param keyno  Key to check
6067 
6068   @todo
6069     Allow use of DS-MRR in cases where the index has partially-covered
6070     components but they are not used for scanning.
6071 
6072   @retval TRUE   Yes
6073   @retval FALSE  No
6074 */
6075 
key_uses_partial_cols(TABLE * table,uint keyno)6076 bool key_uses_partial_cols(TABLE *table, uint keyno)
6077 {
6078   KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6079   KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6080   for (; kp != kp_end; kp++)
6081   {
6082     if (!kp->field->part_of_key.is_set(keyno))
6083       return TRUE;
6084   }
6085   return FALSE;
6086 }
6087 
6088 /****************************************************************************
6089  * Default MRR implementation (MRR to non-MRR converter)
6090  ***************************************************************************/
6091 
6092 /**
6093   Get cost and other information about MRR scan over a known list of ranges
6094 
6095   Calculate estimated cost and other information about an MRR scan for given
6096   sequence of ranges.
6097 
6098   @param keyno           Index number
6099   @param seq             Range sequence to be traversed
6100   @param seq_init_param  First parameter for seq->init()
6101   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
6102                          can't efficiently determine it
6103   @param bufsz    INOUT  IN:  Size of the buffer available for use
6104                          OUT: Size of the buffer that is expected to be actually
6105                               used, or 0 if buffer is not needed.
6106   @param flags    INOUT  A combination of HA_MRR_* flags
6107   @param cost     OUT    Estimated cost of MRR access
6108 
6109   @note
6110     This method (or an overriding one in a derived class) must check for
6111     thd->killed and return HA_POS_ERROR if it is not zero. This is required
6112     for a user to be able to interrupt the calculation by killing the
6113     connection/query.
6114 
6115   @retval
6116     HA_POS_ERROR  Error or the engine is unable to perform the requested
6117                   scan. Values of OUT parameters are undefined.
6118   @retval
6119     other         OK, *cost contains cost of the scan, *bufsz and *flags
6120                   contain scan parameters.
6121 */
6122 
6123 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6124 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6125                                      void *seq_init_param, uint n_ranges_arg,
6126                                      uint *bufsz, uint *flags,
6127                                      Cost_estimate *cost)
6128 {
6129   KEY_MULTI_RANGE range;
6130   range_seq_t seq_it;
6131   ha_rows rows, total_rows= 0;
6132   uint n_ranges=0;
6133   THD *thd= current_thd;
6134 
6135   /* Default MRR implementation doesn't need buffer */
6136   *bufsz= 0;
6137 
6138   DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6139 
6140   seq_it= seq->init(seq_init_param, n_ranges, *flags);
6141   while (!seq->next(seq_it, &range))
6142   {
6143     if (unlikely(thd->killed != 0))
6144       return HA_POS_ERROR;
6145 
6146     n_ranges++;
6147     key_range *min_endp, *max_endp;
6148     if (range.range_flag & GEOM_FLAG)
6149     {
6150       /* In this case tmp_min_flag contains the handler-read-function */
6151       range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
6152       min_endp= &range.start_key;
6153       max_endp= NULL;
6154     }
6155     else
6156     {
6157       min_endp= range.start_key.length? &range.start_key : NULL;
6158       max_endp= range.end_key.length? &range.end_key : NULL;
6159     }
6160     /*
6161       Get the number of rows in the range. This is done by calling
6162       records_in_range() unless:
6163 
6164         1) The range is an equality range and the index is unique.
6165            There cannot be more than one matching row, so 1 is
6166            assumed. Note that it is possible that the correct number
6167            is actually 0, so the row estimate may be too high in this
6168            case. Also note: ranges of the form "x IS NULL" may have more
6169            than 1 mathing row so records_in_range() is called for these.
6170         2) a) The range is an equality range but the index is either
6171               not unique or all of the keyparts are not used.
6172            b) The user has requested that index statistics should be used
6173               for equality ranges to avoid the incurred overhead of
6174               index dives in records_in_range().
6175            c) Index statistics is available.
6176            Ranges of the form "x IS NULL" will not use index statistics
6177            because the number of rows with this value are likely to be
6178            very different than the values in the index statistics.
6179     */
6180     int keyparts_used= 0;
6181     if ((range.range_flag & UNIQUE_RANGE) &&                        // 1)
6182         !(range.range_flag & NULL_RANGE))
6183       rows= 1; /* there can be at most one row */
6184     else if ((range.range_flag & EQ_RANGE) &&                       // 2a)
6185              (range.range_flag & USE_INDEX_STATISTICS) &&           // 2b)
6186              (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6187              table->key_info[keyno].rec_per_key[keyparts_used-1] && // 2c)
6188              !(range.range_flag & NULL_RANGE))
6189       rows= table->key_info[keyno].rec_per_key[keyparts_used-1];
6190     else
6191     {
6192       DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6193       DBUG_ASSERT(min_endp || max_endp);
6194       if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6195                                                         max_endp)))
6196       {
6197         /* Can't scan one range => can't do MRR scan at all */
6198         total_rows= HA_POS_ERROR;
6199         break;
6200       }
6201     }
6202     total_rows += rows;
6203   }
6204 
6205   if (total_rows != HA_POS_ERROR)
6206   {
6207     /* The following calculation is the same as in multi_range_read_info(): */
6208     *flags|= HA_MRR_USE_DEFAULT_IMPL;
6209     *flags|= HA_MRR_SUPPORT_SORTED;
6210 
6211     DBUG_ASSERT(cost->is_zero());
6212     if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
6213       cost->add_io(index_only_read_time(keyno, total_rows) *
6214                    Cost_estimate::IO_BLOCK_READ_COST());
6215     else
6216       cost->add_io(read_time(keyno, n_ranges, total_rows) *
6217                    Cost_estimate::IO_BLOCK_READ_COST());
6218     cost->add_cpu(total_rows * ROW_EVALUATE_COST + 0.01);
6219   }
6220   return total_rows;
6221 }
6222 
6223 
6224 /**
6225   Get cost and other information about MRR scan over some sequence of ranges
6226 
6227   Calculate estimated cost and other information about an MRR scan for some
6228   sequence of ranges.
6229 
6230   The ranges themselves will be known only at execution phase. When this
6231   function is called we only know number of ranges and a (rough) E(#records)
6232   within those ranges.
6233 
6234   Currently this function is only called for "n-keypart singlepoint" ranges,
6235   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6236 
6237   The flags parameter is a combination of those flags: HA_MRR_SORTED,
6238   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6239 
6240   @param keyno           Index number
6241   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
6242                          range sequence.
6243   @param n_rows          Estimated total number of records contained within all
6244                          of the ranges
6245   @param bufsz    INOUT  IN:  Size of the buffer available for use
6246                          OUT: Size of the buffer that will be actually used, or
6247                               0 if buffer is not needed.
6248   @param flags    INOUT  A combination of HA_MRR_* flags
6249   @param cost     OUT    Estimated cost of MRR access
6250 
6251   @retval
6252     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6253           parameters.
6254   @retval
6255     other Error or can't perform the requested scan
6256 */
6257 
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6258 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6259                                        uint *bufsz, uint *flags,
6260                                        Cost_estimate *cost)
6261 {
6262   *bufsz= 0; /* Default implementation doesn't need a buffer */
6263 
6264   *flags|= HA_MRR_USE_DEFAULT_IMPL;
6265   *flags|= HA_MRR_SUPPORT_SORTED;
6266 
6267   DBUG_ASSERT(cost->is_zero());
6268 
6269   /* Produce the same cost as non-MRR code does */
6270   if (*flags & HA_MRR_INDEX_ONLY)
6271     cost->add_io(index_only_read_time(keyno, n_rows) *
6272                  Cost_estimate::IO_BLOCK_READ_COST());
6273   else
6274     cost->add_io(read_time(keyno, n_ranges, n_rows) *
6275                  Cost_estimate::IO_BLOCK_READ_COST());
6276   return 0;
6277 }
6278 
6279 
6280 /**
6281   Initialize the MRR scan
6282 
6283   Initialize the MRR scan. This function may do heavyweight scan
6284   initialization like row prefetching/sorting/etc (NOTE: but better not do
6285   it here as we may not need it, e.g. if we never satisfy WHERE clause on
6286   previous tables. For many implementations it would be natural to do such
6287   initializations in the first multi_read_range_next() call)
6288 
6289   mode is a combination of the following flags: HA_MRR_SORTED,
6290   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6291 
6292   @param seq             Range sequence to be traversed
6293   @param seq_init_param  First parameter for seq->init()
6294   @param n_ranges        Number of ranges in the sequence
6295   @param mode            Flags, see the description section for the details
6296   @param buf             INOUT: memory buffer to be used
6297 
6298   @note
6299     One must have called index_init() before calling this function. Several
6300     multi_range_read_init() calls may be made in course of one query.
6301 
6302     Until WL#2623 is done (see its text, section 3.2), the following will
6303     also hold:
6304     The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6305     then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6306     This property will only be used by NDB handler until WL#2623 is done.
6307 
6308     Buffer memory management is done according to the following scenario:
6309     The caller allocates the buffer and provides it to the callee by filling
6310     the members of HANDLER_BUFFER structure.
6311     The callee consumes all or some fraction of the provided buffer space, and
6312     sets the HANDLER_BUFFER members accordingly.
6313     The callee may use the buffer memory until the next multi_range_read_init()
6314     call is made, all records have been read, or until index_end() call is
6315     made, whichever comes first.
6316 
6317   @retval 0  OK
6318   @retval 1  Error
6319 */
6320 
6321 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6322 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6323                                uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6324 {
6325   DBUG_ENTER("handler::multi_range_read_init");
6326   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6327   mrr_funcs= *seq_funcs;
6328   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6329   mrr_have_range= FALSE;
6330   DBUG_RETURN(0);
6331 }
6332 
6333 
6334 /**
6335   Get next record in MRR scan
6336 
6337   Default MRR implementation: read the next record
6338 
6339   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6340                           Otherwise, the opaque value associated with the range
6341                           that contains the returned record.
6342 
6343   @retval 0      OK
6344   @retval other  Error code
6345 */
6346 
multi_range_read_next(char ** range_info)6347 int handler::multi_range_read_next(char **range_info)
6348 {
6349   int result= HA_ERR_END_OF_FILE;
6350   int range_res;
6351   DBUG_ENTER("handler::multi_range_read_next");
6352 
6353   if (!mrr_have_range)
6354   {
6355     mrr_have_range= TRUE;
6356     goto start;
6357   }
6358 
6359   do
6360   {
6361     /* Save a call if there can be only one row in range. */
6362     if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6363     {
6364       result= read_range_next();
6365       /* On success or non-EOF errors jump to the end. */
6366       if (result != HA_ERR_END_OF_FILE)
6367         break;
6368     }
6369     else
6370     {
6371       if (was_semi_consistent_read())
6372         goto scan_it_again;
6373     }
6374 
6375 start:
6376     /* Try the next range(s) until one matches a record. */
6377     while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6378     {
6379 scan_it_again:
6380       result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6381                                  &mrr_cur_range.start_key : 0,
6382                                mrr_cur_range.end_key.keypart_map ?
6383                                  &mrr_cur_range.end_key : 0,
6384                                MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6385                                mrr_is_output_sorted);
6386       if (result != HA_ERR_END_OF_FILE)
6387         break;
6388     }
6389   }
6390   while ((result == HA_ERR_END_OF_FILE) && !range_res);
6391 
6392   *range_info= mrr_cur_range.ptr;
6393   DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6394   DBUG_RETURN(result);
6395 }
6396 
6397 
6398 /****************************************************************************
6399  * DS-MRR implementation
6400  ***************************************************************************/
6401 
6402 /**
6403   DS-MRR: Initialize and start MRR scan
6404 
6405   Initialize and start the MRR scan. Depending on the mode parameter, this
6406   may use default or DS-MRR implementation.
6407 
6408   The DS-MRR implementation will use a second handler object (h2) for
6409   doing scan on the index:
6410   - on the first call to this function the h2 handler will be created
6411     and h2 will be opened using the same index as the main handler
6412     is set to use. The index scan on the main index will be closed
6413     and it will be re-opened to read records from the table using either
6414     no key or the primary key. The h2 handler will be deleted when
6415     reset() is called (which should happen on the end of the statement).
6416   - when dsmrr_close() is called the index scan on h2 is closed.
6417   - on following calls to this function one of the following must be valid:
6418     a. if dsmrr_close has been called:
6419        the main handler (h) must be open on an index, h2 will be opened
6420        using this index, and the index on h will be closed and
6421        h will be re-opened to read reads from the table using either
6422        no key or the primary key.
6423     b. dsmrr_close has not been called:
6424        h2 will already be open, the main handler h must be set up
6425        to read records from the table (handler->inited is RND) either
6426        using the primary index or using no index at all.
6427 
6428   @param h_arg           Table handler to be used
6429   @param seq_funcs       Interval sequence enumeration functions
6430   @param seq_init_param  Interval sequence enumeration parameter
6431   @param n_ranges        Number of ranges in the sequence.
6432   @param mode            HA_MRR_* modes to use
6433   @param buf             INOUT Buffer to use
6434 
6435   @retval 0     Ok, Scan started.
6436   @retval other Error
6437 */
6438 
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6439 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6440                            void *seq_init_param, uint n_ranges, uint mode,
6441                            HANDLER_BUFFER *buf)
6442 {
6443   uint elem_size;
6444   int retval= 0;
6445   DBUG_ENTER("DsMrr_impl::dsmrr_init");
6446   THD *thd= h_arg->table->in_use;     // current THD
6447 
6448   /*
6449     index_merge may invoke a scan on an object for which dsmrr_info[_const]
6450     has not been called, so set the owner handler here as well.
6451   */
6452   h= h_arg;
6453   if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
6454       mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6455   {
6456     use_default_impl= TRUE;
6457     retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6458                                               n_ranges, mode, buf);
6459     DBUG_RETURN(retval);
6460   }
6461 
6462   /*
6463     This assert will hit if we have pushed an index condition to the
6464     primary key index and then "change our mind" and use a different
6465     index for retrieving data with MRR. One of the following criteria
6466     must be true:
6467       1. We have not pushed an index conditon on this handler.
6468       2. We have pushed an index condition and this is on the currently used
6469          index.
6470       3. We have pushed an index condition but this is not for the primary key.
6471       4. We have pushed an index condition and this has been transferred to
6472          the clone (h2) of the handler object.
6473   */
6474   DBUG_ASSERT(!h->pushed_idx_cond ||
6475               h->pushed_idx_cond_keyno == h->active_index ||
6476               h->pushed_idx_cond_keyno != table->s->primary_key ||
6477               (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6478 
6479   rowids_buf= buf->buffer;
6480 
6481   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6482 
6483   if (is_mrr_assoc)
6484   {
6485     DBUG_ASSERT(!thd->status_var_aggregated);
6486     status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
6487   }
6488 
6489   rowids_buf_end= buf->buffer_end;
6490   elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6491   rowids_buf_last= rowids_buf +
6492                       ((rowids_buf_end - rowids_buf)/ elem_size)*
6493                       elem_size;
6494   rowids_buf_end= rowids_buf_last;
6495 
6496   /*
6497     The DS-MRR scan uses a second handler object (h2) for doing the
6498     index scan. Create this by cloning the primary handler
6499     object. The h2 handler object is deleted when DsMrr_impl::reset()
6500     is called.
6501   */
6502   if (!h2)
6503   {
6504     handler *new_h2;
6505     /*
6506       ::clone() takes up a lot of stack, especially on 64 bit platforms.
6507       The constant 5 is an empiric result.
6508       @todo Is this still the case? Leave it as it is for now but could
6509             likely be removed?
6510     */
6511     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6512       DBUG_RETURN(1);
6513 
6514     if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6515       DBUG_RETURN(1);
6516     h2= new_h2; /* Ok, now can put it into h2 */
6517     table->prepare_for_position();
6518   }
6519 
6520   /*
6521     Open the index scan on h2 using the key from the primary handler.
6522   */
6523   if (h2->active_index == MAX_KEY)
6524   {
6525     DBUG_ASSERT(h->active_index != MAX_KEY);
6526     const uint mrr_keyno= h->active_index;
6527 
6528     if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6529       goto error;
6530 
6531     if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6532       goto error;
6533 
6534     if ((retval= h2->ha_index_init(mrr_keyno, false)))
6535       goto error;
6536 
6537     // Transfer ICP from h to h2
6538     if (mrr_keyno == h->pushed_idx_cond_keyno)
6539     {
6540       if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6541       {
6542         retval= 1;
6543         goto error;
6544       }
6545     }
6546     else
6547     {
6548       // Cancel any potentially previously pushed index conditions
6549       h2->cancel_pushed_idx_cond();
6550     }
6551   }
6552   else
6553   {
6554     /*
6555       h2 has already an open index. This happens when the DS-MRR scan
6556       is re-started without closing it first. In this case the primary
6557       handler must be used for reading records from the table, ie. it
6558       must not be opened for doing a new range scan. In this case
6559       the active_index must either not be set or be the primary key.
6560     */
6561     DBUG_ASSERT(h->inited == handler::RND);
6562     DBUG_ASSERT(h->active_index == MAX_KEY ||
6563                 h->active_index == table->s->primary_key);
6564   }
6565 
6566   /*
6567     The index scan is now transferred to h2 and we can close the open
6568     index scan on the primary handler.
6569   */
6570   if (h->inited == handler::INDEX)
6571   {
6572     /*
6573       Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6574       which will close the index scan on h2. We need to keep it open, so
6575       temporarily move h2 out of the DsMrr object.
6576     */
6577     handler *save_h2= h2;
6578     h2= NULL;
6579     retval= h->ha_index_end();
6580     h2= save_h2;
6581     if (retval)
6582       goto error;
6583   }
6584 
6585   /*
6586     Verify consistency between h and h2.
6587   */
6588   DBUG_ASSERT(h->inited != handler::INDEX);
6589   DBUG_ASSERT(h->active_index == MAX_KEY ||
6590               h->active_index == table->s->primary_key);
6591   DBUG_ASSERT(h2->inited == handler::INDEX);
6592   DBUG_ASSERT(h2->active_index != MAX_KEY);
6593   DBUG_ASSERT(h->m_lock_type == h2->m_lock_type);
6594 
6595   if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6596                                                   n_ranges, mode, buf)))
6597     goto error;
6598 
6599   if ((retval= dsmrr_fill_buffer()))
6600     goto error;
6601 
6602   /*
6603     If the above call has scanned through all intervals in *seq, then
6604     adjust *buf to indicate that the remaining buffer space will not be used.
6605   */
6606   if (dsmrr_eof)
6607     buf->end_of_used_area= rowids_buf_last;
6608 
6609   /*
6610      h->inited == INDEX may occur when 'range checked for each record' is
6611      used.
6612   */
6613   if ((h->inited != handler::RND) &&
6614       ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6615        (h->ha_rnd_init(FALSE))))
6616   {
6617     retval= 1;
6618     goto error;
6619   }
6620 
6621   use_default_impl= FALSE;
6622   h->mrr_funcs= *seq_funcs;
6623 
6624   DBUG_RETURN(0);
6625 error:
6626   h2->ha_index_or_rnd_end();
6627   h2->ha_external_lock(thd, F_UNLCK);
6628   h2->close();
6629   delete h2;
6630   h2= NULL;
6631   DBUG_ASSERT(retval != 0);
6632   DBUG_RETURN(retval);
6633 }
6634 
6635 
dsmrr_close()6636 void DsMrr_impl::dsmrr_close()
6637 {
6638   DBUG_ENTER("DsMrr_impl::dsmrr_close");
6639 
6640   // If there is an open index on h2, then close it
6641   if (h2 && h2->active_index != MAX_KEY)
6642   {
6643     h2->ha_index_or_rnd_end();
6644     h2->ha_external_lock(current_thd, F_UNLCK);
6645   }
6646   use_default_impl= true;
6647   DBUG_VOID_RETURN;
6648 }
6649 
6650 
reset()6651 void DsMrr_impl::reset()
6652 {
6653   DBUG_ENTER("DsMrr_impl::reset");
6654 
6655   if (h2)
6656   {
6657     // Close any ongoing DS-MRR scan
6658     dsmrr_close();
6659 
6660     // Close and delete the h2 handler
6661     h2->close();
6662     delete h2;
6663     h2= NULL;
6664   }
6665   DBUG_VOID_RETURN;
6666 }
6667 
6668 
rowid_cmp(void * h,uchar * a,uchar * b)6669 static int rowid_cmp(void *h, uchar *a, uchar *b)
6670 {
6671   return ((handler*)h)->cmp_ref(a, b);
6672 }
6673 
6674 
6675 /**
6676   DS-MRR: Fill the buffer with rowids and sort it by rowid
6677 
6678   {This is an internal function of DiskSweep MRR implementation}
6679   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6680   buffer. When the buffer is full or scan is completed, sort the buffer by
6681   rowid and return.
6682 
6683   The function assumes that rowids buffer is empty when it is invoked.
6684 
6685   @param h  Table handler
6686 
6687   @retval 0      OK, the next portion of rowids is in the buffer,
6688                  properly ordered
6689   @retval other  Error
6690 */
6691 
dsmrr_fill_buffer()6692 int DsMrr_impl::dsmrr_fill_buffer()
6693 {
6694   char *range_info;
6695   int res= 0;
6696   DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6697   DBUG_ASSERT(rowids_buf < rowids_buf_end);
6698 
6699   rowids_buf_cur= rowids_buf;
6700   while ((rowids_buf_cur < rowids_buf_end) &&
6701          !(res= h2->handler::multi_range_read_next(&range_info)))
6702   {
6703     KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6704     if (h2->mrr_funcs.skip_index_tuple &&
6705         h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6706       continue;
6707 
6708     /* Put rowid, or {rowid, range_id} pair into the buffer */
6709     h2->position(table->record[0]);
6710     memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6711     rowids_buf_cur += h2->ref_length;
6712 
6713     if (is_mrr_assoc)
6714     {
6715       memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6716       rowids_buf_cur += sizeof(void*);
6717     }
6718   }
6719 
6720   if (res && res != HA_ERR_END_OF_FILE)
6721     DBUG_RETURN(res);
6722   dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6723 
6724   /* Sort the buffer contents by rowid */
6725   uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6726   uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6727 
6728   my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6729             (void*)h);
6730   rowids_buf_last= rowids_buf_cur;
6731   rowids_buf_cur=  rowids_buf;
6732   DBUG_RETURN(0);
6733 }
6734 
6735 
6736 /*
6737   DS-MRR implementation: multi_range_read_next() function
6738 */
6739 
dsmrr_next(char ** range_info)6740 int DsMrr_impl::dsmrr_next(char **range_info)
6741 {
6742   int res;
6743   uchar *cur_range_info= 0;
6744   uchar *rowid;
6745 
6746   if (use_default_impl)
6747     return h->handler::multi_range_read_next(range_info);
6748 
6749   do
6750   {
6751     if (rowids_buf_cur == rowids_buf_last)
6752     {
6753       if (dsmrr_eof)
6754       {
6755         res= HA_ERR_END_OF_FILE;
6756         goto end;
6757       }
6758 
6759       res= dsmrr_fill_buffer();
6760       if (res)
6761         goto end;
6762     }
6763 
6764     /* return eof if there are no rowids in the buffer after re-fill attempt */
6765     if (rowids_buf_cur == rowids_buf_last)
6766     {
6767       res= HA_ERR_END_OF_FILE;
6768       goto end;
6769     }
6770     rowid= rowids_buf_cur;
6771 
6772     if (is_mrr_assoc)
6773       memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6774 
6775     rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6776     if (h2->mrr_funcs.skip_record &&
6777 	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
6778       continue;
6779     res= h->rnd_pos(table->record[0], rowid);
6780     break;
6781   } while (true);
6782 
6783   if (is_mrr_assoc)
6784   {
6785     memcpy(range_info, rowid + h->ref_length, sizeof(void*));
6786   }
6787 end:
6788   return res;
6789 }
6790 
6791 
6792 /*
6793   DS-MRR implementation: multi_range_read_info() function
6794 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6795 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6796                                uint *bufsz, uint *flags, Cost_estimate *cost)
6797 {
6798   ha_rows res MY_ATTRIBUTE((unused));
6799   uint def_flags= *flags;
6800   uint def_bufsz= *bufsz;
6801 
6802   /* Get cost/flags/mem_usage of default MRR implementation */
6803   res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6804                                          &def_flags, cost);
6805   DBUG_ASSERT(!res);
6806 
6807   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6808       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6809   {
6810     /* Default implementation is choosen */
6811     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6812     *flags= def_flags;
6813     *bufsz= def_bufsz;
6814     DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6815   }
6816   else
6817   {
6818     /* *flags and *bufsz were set by choose_mrr_impl */
6819     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6820   }
6821   return 0;
6822 }
6823 
6824 
6825 /*
6826   DS-MRR Implementation: multi_range_read_info_const() function
6827 */
6828 
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6829 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6830                                  void *seq_init_param, uint n_ranges,
6831                                  uint *bufsz, uint *flags, Cost_estimate *cost)
6832 {
6833   ha_rows rows;
6834   uint def_flags= *flags;
6835   uint def_bufsz= *bufsz;
6836   /* Get cost/flags/mem_usage of default MRR implementation */
6837   rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
6838                                                 n_ranges, &def_bufsz,
6839                                                 &def_flags, cost);
6840   if (rows == HA_POS_ERROR)
6841   {
6842     /* Default implementation can't perform MRR scan => we can't either */
6843     return rows;
6844   }
6845 
6846   /*
6847     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6848     use the default MRR implementation (we need it for UPDATE/DELETE).
6849     Otherwise, make a choice based on cost and mrr* flags of
6850     @@optimizer_switch.
6851   */
6852   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6853       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6854   {
6855     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6856     *flags= def_flags;
6857     *bufsz= def_bufsz;
6858     DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6859   }
6860   else
6861   {
6862     /* *flags and *bufsz were set by choose_mrr_impl */
6863     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6864   }
6865   return rows;
6866 }
6867 
6868 
6869 /**
6870   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6871 
6872   Make the choice between using Default MRR implementation and DS-MRR.
6873   This function contains common functionality factored out of dsmrr_info()
6874   and dsmrr_info_const(). The function assumes that the default MRR
6875   implementation's applicability requirements are satisfied.
6876 
6877   @param keyno       Index number
6878   @param rows        E(full rows to be retrieved)
6879   @param flags  IN   MRR flags provided by the MRR user
6880                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
6881                      else the value is not modified
6882   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
6883                      else the value is not modified
6884   @param cost   IN   Cost of default MRR implementation
6885                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
6886                      else the value is not modified
6887 
6888   @retval TRUE   Default MRR implementation should be used
6889   @retval FALSE  DS-MRR implementation should be used
6890 */
6891 
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)6892 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
6893                                  uint *bufsz, Cost_estimate *cost)
6894 {
6895   bool res;
6896   THD *thd= current_thd;
6897   if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
6898       *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
6899       (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
6900        key_uses_partial_cols(table, keyno))
6901   {
6902     /* Use the default implementation, don't modify args: See comments  */
6903     return TRUE;
6904   }
6905 
6906   /*
6907     If @@optimizer_switch has "mrr_cost_based" on, we should avoid
6908     using DS-MRR for queries where it is likely that the records are
6909     stored in memory. Since there is currently no way to determine
6910     this, we use a heuristic:
6911     a) if the storage engine has a memory buffer, DS-MRR is only
6912        considered if the table size is bigger than the buffer.
6913     b) if the storage engine does not have a memory buffer, DS-MRR is
6914        only considered if the table size is bigger than 100MB.
6915     c) Since there is an initial setup cost of DS-MRR, so it is only
6916        considered if at least 50 records will be read.
6917   */
6918   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))
6919   {
6920     /*
6921       If the storage engine has a database buffer we use this as the
6922       minimum size the table should have before considering DS-MRR.
6923     */
6924     longlong min_file_size= table->file->get_memory_buffer_size();
6925     if (min_file_size == -1)
6926     {
6927       // No estimate for database buffer
6928       min_file_size= 100 * 1024 * 1024;    // 100 MB
6929     }
6930 
6931     if (table->file->stats.data_file_length <
6932         static_cast<ulonglong>(min_file_size) ||
6933         rows <= 50)
6934       return true;                 // Use the default implementation
6935   }
6936 
6937   Cost_estimate dsmrr_cost;
6938   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
6939     return TRUE;
6940 
6941   bool force_dsmrr;
6942   /*
6943     If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
6944     of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
6945     allows one to force use of DS-MRR whenever it is applicable without
6946     affecting other cost-based choices.
6947   */
6948   if ((force_dsmrr=
6949        (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) &&
6950         !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))) &&
6951       dsmrr_cost.total_cost() > cost->total_cost())
6952     dsmrr_cost= *cost;
6953 
6954   if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
6955   {
6956     *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
6957     *flags &= ~HA_MRR_SUPPORT_SORTED;    /* We can't provide ordered output */
6958     *cost= dsmrr_cost;
6959     res= FALSE;
6960   }
6961   else
6962   {
6963     /* Use the default MRR implementation */
6964     res= TRUE;
6965   }
6966   return res;
6967 }
6968 
6969 
6970 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
6971                                     Cost_estimate *cost);
6972 
6973 
6974 /**
6975   Get cost of DS-MRR scan
6976 
6977   @param keynr              Index to be used
6978   @param rows               E(Number of rows to be scanned)
6979   @param flags              Scan parameters (HA_MRR_* flags)
6980   @param buffer_size INOUT  Buffer size
6981   @param cost        OUT    The cost
6982 
6983   @retval FALSE  OK
6984   @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
6985                  for even 1 rowid)
6986 */
6987 
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)6988 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
6989                                          uint *buffer_size,
6990                                          Cost_estimate *cost)
6991 {
6992   ha_rows rows_in_last_step;
6993   uint n_full_steps;
6994   double index_read_cost;
6995 
6996   const uint elem_size= h->ref_length +
6997                         sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
6998   const ha_rows max_buff_entries= *buffer_size / elem_size;
6999 
7000   if (!max_buff_entries)
7001     return TRUE; /* Buffer has not enough space for even 1 rowid */
7002 
7003   /* Number of iterations we'll make with full buffer */
7004   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7005 
7006   /*
7007     Get numbers of rows we'll be processing in last iteration, with
7008     non-full buffer
7009   */
7010   rows_in_last_step= rows % max_buff_entries;
7011 
7012   DBUG_ASSERT(cost->is_zero());
7013 
7014   if (n_full_steps)
7015   {
7016     get_sort_and_sweep_cost(table, max_buff_entries, cost);
7017     cost->multiply(n_full_steps);
7018   }
7019   else
7020   {
7021     /*
7022       Adjust buffer size since only parts of the buffer will be used:
7023       1. Adjust record estimate for the last scan to reduce likelyhood
7024          of needing more than one scan by adding 20 percent to the
7025          record estimate and by ensuring this is at least 100 records.
7026       2. If the estimated needed buffer size is lower than suggested by
7027          the caller then set it to the estimated buffer size.
7028     */
7029     const ha_rows keys_in_buffer=
7030       max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7031     *buffer_size= min<ulong>(*buffer_size,
7032                              static_cast<ulong>(keys_in_buffer) * elem_size);
7033   }
7034 
7035   Cost_estimate last_step_cost;
7036   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7037   (*cost)+= last_step_cost;
7038 
7039   /*
7040     Cost of memory is not included in the total_cost() function and
7041     thus will not be considered when comparing costs. Still, we
7042     record it in the cost estimate object for future use.
7043   */
7044   cost->add_mem(*buffer_size);
7045 
7046   /* Total cost of all index accesses */
7047   index_read_cost= h->index_only_read_time(keynr, rows);
7048   cost->add_io(index_read_cost * Cost_estimate::IO_BLOCK_READ_COST());
7049 
7050   /*
7051     Add CPU cost for processing records (see
7052     @handler::multi_range_read_info_const()).
7053   */
7054   cost->add_cpu(rows * ROW_EVALUATE_COST);
7055   return FALSE;
7056 }
7057 
7058 
7059 /*
7060   Get cost of one sort-and-sweep step
7061 
7062   SYNOPSIS
7063     get_sort_and_sweep_cost()
7064       table       Table being accessed
7065       nrows       Number of rows to be sorted and retrieved
7066       cost   OUT  The cost
7067 
7068   DESCRIPTION
7069     Get cost of these operations:
7070      - sort an array of #nrows ROWIDs using qsort
7071      - read #nrows records from table in a sweep.
7072 */
7073 
7074 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7075 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7076 {
7077   DBUG_ASSERT(cost->is_zero());
7078   if (nrows)
7079   {
7080     get_sweep_read_cost(table, nrows, FALSE, cost);
7081 
7082     /*
7083       Constant for the cost of doing one key compare operation in the
7084       sort operation. We should have used the existing
7085       ROWID_COMPARE_COST constant here but this would make the cost
7086       estimate of sorting very high for queries accessing many
7087       records. Until this constant is adjusted we introduce a constant
7088       that is more realistic. @todo: Replace this with
7089       ROWID_COMPARE_COST when this have been given a realistic value.
7090     */
7091     const double ROWID_COMPARE_SORT_COST = 0.01;
7092 
7093     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7094     const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7095     cost->add_cpu(cpu_sort);
7096   }
7097 }
7098 
7099 
7100 /**
7101   Get cost of reading nrows table records in a "disk sweep"
7102 
7103   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7104   for an ordered sequence of rowids.
7105 
7106   We assume hard disk IO. The read is performed as follows:
7107 
7108    1. The disk head is moved to the needed cylinder
7109    2. The controller waits for the plate to rotate
7110    3. The data is transferred
7111 
7112   Time to do #3 is insignificant compared to #2+#1.
7113 
7114   Time to move the disk head is proportional to head travel distance.
7115 
7116   Time to wait for the plate to rotate depends on whether the disk head
7117   was moved or not.
7118 
7119   If disk head wasn't moved, the wait time is proportional to distance
7120   between the previous block and the block we're reading.
7121 
7122   If the head was moved, we don't know how much we'll need to wait for the
7123   plate to rotate. We assume the wait time to be a variate with a mean of
7124   0.5 of full rotation time.
7125 
7126   Our cost units are "random disk seeks". The cost of random disk seek is
7127   actually not a constant, it depends one range of cylinders we're going
7128   to access. We make it constant by introducing a fuzzy concept of "typical
7129   datafile length" (it's fuzzy as it's hard to tell whether it should
7130   include index file, temp.tables etc). Then random seek cost is:
7131 
7132     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7133 
7134   We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
7135 
7136   @param table             Table to be accessed
7137   @param nrows             Number of rows to retrieve
7138   @param interrupted       TRUE <=> Assume that the disk sweep will be
7139                            interrupted by other disk IO. FALSE - otherwise.
7140   @param cost         OUT  The cost.
7141 */
7142 
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7143 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7144                          Cost_estimate *cost)
7145 {
7146   DBUG_ENTER("get_sweep_read_cost");
7147 
7148   DBUG_ASSERT(cost->is_zero());
7149   if(nrows > 0)
7150   {
7151     double n_blocks=
7152       ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7153     if (n_blocks < 1.0)                         // When data_file_length is 0
7154       n_blocks= 1.0;
7155     double busy_blocks=
7156       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7157     if (busy_blocks < 1.0)
7158       busy_blocks= 1.0;
7159 
7160     DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7161                        busy_blocks));
7162     /*
7163       The random access cost for reading the data pages will be the
7164       upper limit for the sweep_cost.
7165     */
7166     cost->add_io(busy_blocks * Cost_estimate::IO_BLOCK_READ_COST());
7167 
7168     if (!interrupted)
7169     {
7170       /* Assume reading is done in one 'sweep' */
7171       Cost_estimate sweep_cost;
7172       sweep_cost.add_io(busy_blocks *
7173                    (DISK_SEEK_BASE_COST +
7174                     DISK_SEEK_PROP_COST * n_blocks / busy_blocks));
7175       /*
7176         For some cases, ex: when only few blocks need to be read
7177         and the seek distance becomes very large, the sweep cost
7178         model can produce a cost estimate that is larger than the
7179         cost of random access.  To handle this case, we use the
7180         sweep cost only when it is less than the random access
7181         cost.
7182       */
7183       if (sweep_cost.get_io_cost() < cost->get_io_cost())
7184         *cost= sweep_cost;
7185     }
7186   }
7187   DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7188   DBUG_VOID_RETURN;
7189 }
7190 
7191 
7192 /****************************************************************************
7193  * DS-MRR implementation ends
7194  ***************************************************************************/
7195 
7196 /** @brief
7197   Read first row between two ranges.
7198   Store ranges for future calls to read_range_next.
7199 
7200   @param start_key		Start key. Is 0 if no min range
7201   @param end_key		End key.  Is 0 if no max range
7202   @param eq_range_arg	        Set to 1 if start_key == end_key
7203   @param sorted		Set to 1 if result should be sorted per key
7204 
7205   @note
7206     Record is read into table->record[0]
7207 
7208   @retval
7209     0			Found row
7210   @retval
7211     HA_ERR_END_OF_FILE	No rows in range
7212   @retval
7213     \#			Error code
7214 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7215 int handler::read_range_first(const key_range *start_key,
7216 			      const key_range *end_key,
7217 			      bool eq_range_arg,
7218                               bool sorted /* ignored */)
7219 {
7220   int result;
7221   DBUG_ENTER("handler::read_range_first");
7222 
7223   eq_range= eq_range_arg;
7224   set_end_range(end_key, RANGE_SCAN_ASC);
7225 
7226   range_key_part= table->key_info[active_index].key_part;
7227 
7228   if (!start_key)			// Read first record
7229     result= ha_index_first(table->record[0]);
7230   else
7231     result= ha_index_read_map(table->record[0],
7232                               start_key->key,
7233                               start_key->keypart_map,
7234                               start_key->flag);
7235   if (result)
7236     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7237 		? HA_ERR_END_OF_FILE
7238 		: result);
7239 
7240   if (compare_key(end_range) <= 0)
7241   {
7242     DBUG_RETURN(0);
7243   }
7244   else
7245   {
7246     /*
7247       The last read row does not fall in the range. So request
7248       storage engine to release row lock if possible.
7249     */
7250     unlock_row();
7251     DBUG_RETURN(HA_ERR_END_OF_FILE);
7252   }
7253 }
7254 
7255 
7256 /** @brief
7257   Read next row between two endpoints.
7258 
7259   @note
7260     Record is read into table->record[0]
7261 
7262   @retval
7263     0			Found row
7264   @retval
7265     HA_ERR_END_OF_FILE	No rows in range
7266   @retval
7267     \#			Error code
7268 */
read_range_next()7269 int handler::read_range_next()
7270 {
7271   int result;
7272   DBUG_ENTER("handler::read_range_next");
7273 
7274   if (eq_range)
7275   {
7276     /* We trust that index_next_same always gives a row in range */
7277     DBUG_RETURN(ha_index_next_same(table->record[0],
7278                                    end_range->key,
7279                                    end_range->length));
7280   }
7281   result= ha_index_next(table->record[0]);
7282   if (result)
7283     DBUG_RETURN(result);
7284 
7285   if (compare_key(end_range) <= 0)
7286   {
7287     DBUG_RETURN(0);
7288   }
7289   else
7290   {
7291     /*
7292       The last read row does not fall in the range. So request
7293       storage engine to release row lock if possible.
7294     */
7295     unlock_row();
7296     DBUG_RETURN(HA_ERR_END_OF_FILE);
7297   }
7298 }
7299 
7300 
set_end_range(const key_range * range,enum_range_scan_direction direction)7301 void handler::set_end_range(const key_range* range,
7302                             enum_range_scan_direction direction)
7303 {
7304   if (range)
7305   {
7306     save_end_range= *range;
7307     end_range= &save_end_range;
7308     range_key_part= table->key_info[active_index].key_part;
7309     key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7310                                   (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7311   }
7312   else
7313     end_range= NULL;
7314 
7315   range_scan_direction= direction;
7316 }
7317 
7318 
7319 /**
7320   Compare if found key (in row) is over max-value.
7321 
7322   @param range		range to compare to row. May be 0 for no range
7323 
7324   @seealso
7325     key.cc::key_cmp()
7326 
7327   @return
7328     The return value is SIGN(key_in_row - range_key):
7329 
7330     - 0   : Key is equal to range or 'range' == 0 (no range)
7331     - -1  : Key is less than range
7332     - 1   : Key is larger than range
7333 */
compare_key(key_range * range)7334 int handler::compare_key(key_range *range)
7335 {
7336   int cmp;
7337   if (!range || in_range_check_pushed_down)
7338     return 0;					// No max range
7339   cmp= key_cmp(range_key_part, range->key, range->length);
7340   if (!cmp)
7341     cmp= key_compare_result_on_equal;
7342   return cmp;
7343 }
7344 
7345 
7346 /*
7347   Compare if a found key (in row) is within the range.
7348 
7349   This function is similar to compare_key() but checks the range scan
7350   direction to determine if this is a descending scan. This function
7351   is used by the index condition pushdown implementation to determine
7352   if the read record is within the range scan.
7353 
7354   @param range Range to compare to row. May be NULL for no range.
7355 
7356   @seealso
7357     handler::compare_key()
7358 
7359   @return Returns whether the key is within the range
7360 
7361     - 0   : Key is equal to range or 'range' == 0 (no range)
7362     - -1  : Key is within the current range
7363     - 1   : Key is outside the current range
7364 */
7365 
compare_key_icp(const key_range * range) const7366 int handler::compare_key_icp(const key_range *range) const
7367 {
7368   int cmp;
7369   if (!range)
7370     return 0;					// no max range
7371   cmp= key_cmp(range_key_part, range->key, range->length);
7372   if (!cmp)
7373     cmp= key_compare_result_on_equal;
7374   if (range_scan_direction == RANGE_SCAN_DESC)
7375     cmp= -cmp;
7376   return cmp;
7377 }
7378 
7379 /**
7380    Change the offsets of all the fields in a key range.
7381 
7382    @param range   the key range
7383    @param key_part the first key part
7384    @param diff    how much to change the offsets with
7385 */
7386 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7387 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7388                        my_ptrdiff_t diff)
7389 {
7390   for (size_t len= 0; len < range->length;
7391        len+= key_part->store_length, ++key_part)
7392     key_part->field->move_field_offset(diff);
7393 }
7394 
7395 /**
7396   Check if the key in the given buffer (which is not necessarily
7397   TABLE::record[0]) is within range. Called by the storage engine to
7398   avoid reading too many rows.
7399 
7400   @param buf  the buffer that holds the key
7401   @retval -1 if the key is within the range
7402   @retval  0 if the key is equal to the end_range key, and
7403              key_compare_result_on_equal is 0
7404   @retval  1 if the key is outside the range
7405 */
compare_key_in_buffer(const uchar * buf) const7406 int handler::compare_key_in_buffer(const uchar *buf) const
7407 {
7408   DBUG_ASSERT(end_range != NULL);
7409 
7410   /*
7411     End range on descending scans is only checked with ICP for now, and then we
7412     check it with compare_key_icp() instead of this function.
7413   */
7414   DBUG_ASSERT(range_scan_direction == RANGE_SCAN_ASC);
7415 
7416   // Make the fields in the key point into the buffer instead of record[0].
7417   const my_ptrdiff_t diff= buf - table->record[0];
7418   if (diff != 0)
7419     move_key_field_offsets(end_range, range_key_part, diff);
7420 
7421    // Compare the key in buf against end_range.
7422    int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7423    if (cmp == 0)
7424      cmp= key_compare_result_on_equal;
7425 
7426    // Reset the field offsets.
7427    if (diff != 0)
7428      move_key_field_offsets(end_range, range_key_part, -diff);
7429 
7430    return cmp;
7431 }
7432 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7433 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7434                                 key_part_map keypart_map,
7435                                 enum ha_rkey_function find_flag)
7436 {
7437   int error, error1;
7438   error= index_init(index, 0);
7439   if (!error)
7440   {
7441     error= index_read_map(buf, key, keypart_map, find_flag);
7442     error1= index_end();
7443   }
7444   return error ?  error : error1;
7445 }
7446 
7447 
7448 /**
7449   Returns a list of all known extensions.
7450 
7451     No mutexes, worst case race is a minor surplus memory allocation
7452     We have to recreate the extension map if mysqld is restarted (for example
7453     within libmysqld)
7454 
7455   @retval
7456     pointer		pointer to TYPELIB structure
7457 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7458 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7459                                void *arg)
7460 {
7461   List<char> *found_exts= (List<char> *) arg;
7462   handlerton *hton= plugin_data(plugin, handlerton *);
7463   handler *file;
7464   if (hton->state == SHOW_OPTION_YES && hton->create &&
7465       (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7466   {
7467     List_iterator_fast<char> it(*found_exts);
7468     const char **ext, *old_ext;
7469 
7470     for (ext= file->bas_ext(); *ext; ext++)
7471     {
7472       while ((old_ext= it++))
7473       {
7474         if (!strcmp(old_ext, *ext))
7475 	  break;
7476       }
7477       if (!old_ext)
7478         found_exts->push_back((char *) *ext);
7479 
7480       it.rewind();
7481     }
7482     delete file;
7483   }
7484   return FALSE;
7485 }
7486 
ha_known_exts()7487 TYPELIB* ha_known_exts()
7488 {
7489   TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7490   known_extensions->name= "known_exts";
7491   known_extensions->type_lengths= NULL;
7492 
7493   List<char> found_exts;
7494   const char **ext, *old_ext;
7495 
7496   found_exts.push_back((char*) TRG_EXT);
7497   found_exts.push_back((char*) TRN_EXT);
7498 
7499   plugin_foreach(NULL, exts_handlerton,
7500                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7501 
7502   size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7503   ext= (const char **) sql_alloc(arr_length);
7504 
7505   DBUG_ASSERT(NULL != ext);
7506   known_extensions->count= found_exts.elements;
7507   known_extensions->type_names= ext;
7508 
7509   List_iterator_fast<char> it(found_exts);
7510   while ((old_ext= it++))
7511     *ext++= old_ext;
7512   *ext= NULL;
7513   return known_extensions;
7514 }
7515 
7516 
stat_print(THD * thd,const char * type,uint type_len,const char * file,uint file_len,const char * status,uint status_len)7517 static bool stat_print(THD *thd, const char *type, uint type_len,
7518                        const char *file, uint file_len,
7519                        const char *status, uint status_len)
7520 {
7521   Protocol *protocol= thd->protocol;
7522   protocol->prepare_for_resend();
7523   protocol->store(type, type_len, system_charset_info);
7524   protocol->store(file, file_len, system_charset_info);
7525   protocol->store(status, status_len, system_charset_info);
7526   if (protocol->write())
7527     return TRUE;
7528   return FALSE;
7529 }
7530 
7531 
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7532 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7533                                    void *arg)
7534 {
7535   enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7536   handlerton *hton= plugin_data(plugin, handlerton *);
7537   if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7538       hton->show_status(hton, thd, stat_print, stat))
7539     return TRUE;
7540   return FALSE;
7541 }
7542 
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7543 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7544 {
7545   List<Item> field_list;
7546   Protocol *protocol= thd->protocol;
7547   bool result;
7548 
7549   field_list.push_back(new Item_empty_string("Type",10));
7550   field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7551   field_list.push_back(new Item_empty_string("Status",10));
7552 
7553   if (protocol->send_result_set_metadata(&field_list,
7554                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7555     return TRUE;
7556 
7557   if (db_type == NULL)
7558   {
7559     result= plugin_foreach(thd, showstat_handlerton,
7560                            MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7561   }
7562   else
7563   {
7564     if (db_type->state != SHOW_OPTION_YES)
7565     {
7566       const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7567       result= stat_print(thd, name->str, name->length,
7568                          "", 0, "DISABLED", 8) ? 1 : 0;
7569     }
7570     else
7571     {
7572       DBUG_EXECUTE_IF("simulate_show_status_failure",
7573                       DBUG_SET("+d,simulate_net_write_failure"););
7574       result= db_type->show_status &&
7575               db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7576       DBUG_EXECUTE_IF("simulate_show_status_failure",
7577                       DBUG_SET("-d,simulate_net_write_failure"););
7578     }
7579   }
7580 
7581   if (!result)
7582     my_eof(thd);
7583   return result;
7584 }
7585 
flush_changed_page_bitmaps_handlerton(THD * unused1,plugin_ref plugin,void * unused2)7586 static my_bool flush_changed_page_bitmaps_handlerton(THD *unused1,
7587                                                      plugin_ref plugin,
7588                                                      void *unused2)
7589 {
7590   handlerton *hton= plugin_data(plugin, handlerton *);
7591 
7592   if (hton->flush_changed_page_bitmaps == NULL)
7593     return FALSE;
7594 
7595   return hton->flush_changed_page_bitmaps();
7596 }
7597 
ha_flush_changed_page_bitmaps()7598 bool ha_flush_changed_page_bitmaps()
7599 {
7600   return plugin_foreach(NULL, flush_changed_page_bitmaps_handlerton,
7601                         MYSQL_STORAGE_ENGINE_PLUGIN, NULL);
7602 }
7603 
purge_changed_page_bitmaps_handlerton(THD * unused1,plugin_ref plugin,void * lsn)7604 static my_bool purge_changed_page_bitmaps_handlerton(THD *unused1,
7605                                                      plugin_ref plugin,
7606                                                      void *lsn)
7607 {
7608   handlerton *hton= plugin_data(plugin, handlerton *);
7609 
7610   if (hton->purge_changed_page_bitmaps == NULL)
7611     return FALSE;
7612 
7613   return hton->purge_changed_page_bitmaps(*(ulonglong *)lsn);
7614 }
7615 
ha_purge_changed_page_bitmaps(ulonglong lsn)7616 bool ha_purge_changed_page_bitmaps(ulonglong lsn)
7617 {
7618   return plugin_foreach(NULL, purge_changed_page_bitmaps_handlerton,
7619                         MYSQL_STORAGE_ENGINE_PLUGIN, &lsn);
7620 }
7621 
purge_archive_logs_handlerton(THD * thd,plugin_ref plugin,void * arg)7622 static my_bool purge_archive_logs_handlerton(THD *thd, plugin_ref plugin,
7623                                              void *arg)
7624 {
7625   ulong before_timestamp= *(ulong*) arg;
7626   handlerton *hton= plugin_data(plugin, handlerton *);
7627 
7628   if (hton->purge_archive_logs == NULL)
7629     return FALSE;
7630 
7631   return hton->purge_archive_logs(hton, before_timestamp, NULL);
7632 }
7633 
ha_purge_archive_logs(THD * thd,handlerton * db_type,void * args)7634 bool ha_purge_archive_logs(THD *thd, handlerton *db_type, void* args)
7635 {
7636   if (db_type == NULL)
7637     return plugin_foreach(thd, purge_archive_logs_handlerton,
7638                            MYSQL_STORAGE_ENGINE_PLUGIN, args);
7639 
7640   return false;
7641 }
7642 
purge_archive_logs_to_handlerton(THD * thd,plugin_ref plugin,void * arg)7643 static my_bool purge_archive_logs_to_handlerton(THD *thd, plugin_ref plugin,
7644                                                 void *arg)
7645 {
7646   const char* to_filename= (const char*) arg;
7647   handlerton *hton= plugin_data(plugin, handlerton *);
7648 
7649   if (hton->purge_archive_logs == NULL)
7650     return FALSE;
7651 
7652   return hton->purge_archive_logs(hton, 0, to_filename);
7653 }
7654 
ha_purge_archive_logs_to(THD * thd,handlerton * db_type,void * args)7655 bool ha_purge_archive_logs_to(THD *thd, handlerton *db_type, void* args)
7656 {
7657   if (db_type == NULL)
7658     return plugin_foreach(thd, purge_archive_logs_to_handlerton,
7659                            MYSQL_STORAGE_ENGINE_PLUGIN, args);
7660 
7661   return false;
7662 }
7663 
7664 /*
7665   Function to check if the conditions for row-based binlogging is
7666   correct for the table.
7667 
7668   A row in the given table should be replicated if:
7669   - Row-based replication is enabled in the current thread
7670   - The binlog is enabled
7671   - It is not a temporary table
7672   - The binary log is open
7673   - The database the table resides in shall be binlogged (binlog_*_db rules)
7674   - table is not mysql.event
7675 */
7676 
check_table_binlog_row_based(THD * thd,TABLE * table)7677 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7678 {
7679   if (table->s->cached_row_logging_check == -1)
7680   {
7681     int const check(table->s->tmp_table == NO_TMP_TABLE &&
7682                     ! table->no_replicate &&
7683                     binlog_filter->db_ok(table->s->db.str));
7684     table->s->cached_row_logging_check= check;
7685   }
7686 
7687   DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
7688               table->s->cached_row_logging_check == 1);
7689 
7690   return (thd->is_current_stmt_binlog_format_row() &&
7691           table->s->cached_row_logging_check &&
7692           (thd->variables.option_bits & OPTION_BIN_LOG) &&
7693           mysql_bin_log.is_open());
7694 }
7695 
7696 
7697 /** @brief
7698    Write table maps for all (manually or automatically) locked tables
7699    to the binary log.
7700 
7701    SYNOPSIS
7702      write_locked_table_maps()
7703        thd     Pointer to THD structure
7704 
7705    DESCRIPTION
7706        This function will generate and write table maps for all tables
7707        that are locked by the thread 'thd'.
7708 
7709    RETURN VALUE
7710        0   All OK
7711        1   Failed to write all table maps
7712 
7713    SEE ALSO
7714        THD::lock
7715 */
7716 
write_locked_table_maps(THD * thd)7717 static int write_locked_table_maps(THD *thd)
7718 {
7719   DBUG_ENTER("write_locked_table_maps");
7720   DBUG_PRINT("enter", ("thd: 0x%lx  thd->lock: 0x%lx "
7721                        "thd->extra_lock: 0x%lx",
7722                        (long) thd, (long) thd->lock, (long) thd->extra_lock));
7723 
7724   DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7725 
7726   if (thd->get_binlog_table_maps() == 0)
7727   {
7728     MYSQL_LOCK *locks[2];
7729     locks[0]= thd->extra_lock;
7730     locks[1]= thd->lock;
7731     for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7732     {
7733       MYSQL_LOCK const *const lock= locks[i];
7734       if (lock == NULL)
7735         continue;
7736 
7737       bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7738       TABLE **const end_ptr= lock->table + lock->table_count;
7739       for (TABLE **table_ptr= lock->table ;
7740            table_ptr != end_ptr ;
7741            ++table_ptr)
7742       {
7743         TABLE *const table= *table_ptr;
7744         DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7745         if (table->current_lock == F_WRLCK &&
7746             check_table_binlog_row_based(thd, table))
7747         {
7748           /*
7749             We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7750             (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7751             compatible behavior with the STMT based replication even when
7752             the table is not transactional. In other words, if the operation
7753             fails while executing the insert phase nothing is written to the
7754             binlog.
7755 
7756             Note that at this point, we check the type of a set of tables to
7757             create the table map events. In the function binlog_log_row(),
7758             which calls the current function, we check the type of the table
7759             of the current row.
7760           */
7761           bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7762                                 table->file->has_transactions();
7763           int const error= thd->binlog_write_table_map(table, has_trans,
7764                                                        need_binlog_rows_query);
7765           /* Binlog Rows_query log event once for one statement which updates
7766              two or more tables.*/
7767           if (need_binlog_rows_query)
7768             need_binlog_rows_query= FALSE;
7769           /*
7770             If an error occurs, it is the responsibility of the caller to
7771             roll back the transaction.
7772           */
7773           if (unlikely(error))
7774             DBUG_RETURN(1);
7775         }
7776       }
7777     }
7778   }
7779   DBUG_RETURN(0);
7780 }
7781 
7782 
7783 typedef bool Log_func(THD*, TABLE*, bool,
7784                       const uchar*, const uchar*);
7785 
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7786 int binlog_log_row(TABLE* table,
7787                           const uchar *before_record,
7788                           const uchar *after_record,
7789                           Log_func *log_func)
7790 {
7791   bool error= 0;
7792   THD *const thd= table->in_use;
7793 
7794   if (check_table_binlog_row_based(thd, table))
7795   {
7796     DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
7797               (table->s->fields + 7) / 8);
7798 
7799     /*
7800       If there are no table maps written to the binary log, this is
7801       the first row handled in this statement. In that case, we need
7802       to write table maps for all locked tables to the binary log.
7803     */
7804     if (likely(!(error= write_locked_table_maps(thd))))
7805     {
7806       /*
7807         We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7808         (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7809         compatible behavior with the STMT based replication even when
7810         the table is not transactional. In other words, if the operation
7811         fails while executing the insert phase nothing is written to the
7812         binlog.
7813       */
7814       bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7815                            table->file->has_transactions();
7816       error=
7817         (*log_func)(thd, table, has_trans, before_record, after_record);
7818     }
7819   }
7820   return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7821 }
7822 
ha_external_lock(THD * thd,int lock_type)7823 int handler::ha_external_lock(THD *thd, int lock_type)
7824 {
7825   int error;
7826   DBUG_ENTER("handler::ha_external_lock");
7827   /*
7828     Whether this is lock or unlock, this should be true, and is to verify that
7829     if get_auto_increment() was called (thus may have reserved intervals or
7830     taken a table lock), ha_release_auto_increment() was too.
7831   */
7832   DBUG_ASSERT(next_insert_id == 0);
7833   /* Consecutive calls for lock without unlocking in between is not allowed */
7834   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7835               ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
7836                lock_type == F_UNLCK));
7837   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
7838   DBUG_ASSERT(inited == NONE || table->open_by_handler);
7839 
7840   if (MYSQL_HANDLER_RDLOCK_START_ENABLED() ||
7841       MYSQL_HANDLER_WRLOCK_START_ENABLED() ||
7842       MYSQL_HANDLER_UNLOCK_START_ENABLED())
7843   {
7844     if (lock_type == F_RDLCK)
7845     {
7846       MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
7847                                  table_share->table_name.str);
7848     }
7849     else if (lock_type == F_WRLCK)
7850     {
7851       MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
7852                                  table_share->table_name.str);
7853     }
7854     else if (lock_type == F_UNLCK)
7855     {
7856       MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
7857                                  table_share->table_name.str);
7858     }
7859   }
7860 
7861   ha_statistic_increment(&SSV::ha_external_lock_count);
7862 
7863   MYSQL_TABLE_LOCK_WAIT(m_psi, PSI_TABLE_EXTERNAL_LOCK, lock_type,
7864     { error= external_lock(thd, lock_type); })
7865 
7866   /*
7867     We cache the table flags if the locking succeeded. Otherwise, we
7868     keep them as they were when they were fetched in ha_open().
7869   */
7870 
7871   if (error == 0)
7872   {
7873     /*
7874       The lock type is needed by MRR when creating a clone of this handler
7875       object.
7876     */
7877     m_lock_type= lock_type;
7878     cached_table_flags= table_flags();
7879   }
7880 
7881   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
7882       MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
7883       MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
7884   {
7885     if (lock_type == F_RDLCK)
7886     {
7887       MYSQL_HANDLER_RDLOCK_DONE(error);
7888     }
7889     else if (lock_type == F_WRLCK)
7890     {
7891       MYSQL_HANDLER_WRLOCK_DONE(error);
7892     }
7893     else if (lock_type == F_UNLCK)
7894     {
7895       MYSQL_HANDLER_UNLOCK_DONE(error);
7896     }
7897   }
7898   DBUG_RETURN(error);
7899 }
7900 
7901 
7902 /** @brief
7903   Check handler usage and reset state of file to after 'open'
7904 
7905   @note can be called regardless of it is locked or not.
7906 */
ha_reset()7907 int handler::ha_reset()
7908 {
7909   DBUG_ENTER("handler::ha_reset");
7910   /* Check that we have called all proper deallocation functions */
7911   DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
7912               table->s->column_bitmap_size ==
7913               (uchar*) table->def_write_set.bitmap);
7914   DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
7915   DBUG_ASSERT(table->key_read == 0);
7916   /* ensure that ha_index_end / ha_rnd_end has been called */
7917   DBUG_ASSERT(inited == NONE);
7918   /* Free cache used by filesort */
7919   free_io_cache(table);
7920   /* reset the bitmaps to point to defaults */
7921   table->default_column_bitmaps();
7922   /* Reset information about pushed engine conditions */
7923   pushed_cond= NULL;
7924   /* Reset information about pushed index conditions */
7925   cancel_pushed_idx_cond();
7926 
7927   const int retval= reset();
7928   DBUG_RETURN(retval);
7929 }
7930 
7931 
ha_write_row(uchar * buf)7932 int handler::ha_write_row(uchar *buf)
7933 {
7934   int error;
7935   Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
7936   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7937               m_lock_type == F_WRLCK);
7938 
7939   DBUG_ENTER("handler::ha_write_row");
7940   DEBUG_SYNC(ha_thd(), "start_ha_write_row");
7941   DBUG_EXECUTE_IF("inject_error_ha_write_row",
7942                   DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
7943 
7944   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
7945   mark_trx_read_write();
7946 
7947   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_WRITE_ROW, MAX_KEY, 0,
7948     { error= write_row(buf); })
7949 
7950   MYSQL_INSERT_ROW_DONE(error);
7951   if (unlikely(error))
7952     DBUG_RETURN(error);
7953 
7954   if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
7955     DBUG_RETURN(error); /* purecov: inspected */
7956 
7957   if (likely(!is_fake_change_enabled(ha_thd())))
7958     rows_changed++;
7959 
7960   DEBUG_SYNC_C("ha_write_row_end");
7961   DBUG_RETURN(0);
7962 }
7963 
7964 
ha_update_row(const uchar * old_data,uchar * new_data)7965 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
7966 {
7967   int error;
7968   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7969               m_lock_type == F_WRLCK);
7970   Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
7971 
7972   /*
7973     Some storage engines require that the new record is in record[0]
7974     (and the old record is in record[1]).
7975    */
7976   DBUG_ASSERT(new_data == table->record[0]);
7977   DBUG_ASSERT(old_data == table->record[1]);
7978 
7979   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
7980   mark_trx_read_write();
7981 
7982   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_UPDATE_ROW, active_index, 0,
7983     { error= update_row(old_data, new_data);})
7984 
7985   MYSQL_UPDATE_ROW_DONE(error);
7986   if (unlikely(error))
7987     return error;
7988   if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
7989     return error;
7990 
7991   if (likely(!is_fake_change_enabled(ha_thd())))
7992     rows_changed++;
7993 
7994   return 0;
7995 }
7996 
ha_delete_row(const uchar * buf)7997 int handler::ha_delete_row(const uchar *buf)
7998 {
7999   int error;
8000   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
8001               m_lock_type == F_WRLCK);
8002   Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8003   /*
8004     Normally table->record[0] is used, but sometimes table->record[1] is used.
8005   */
8006   DBUG_ASSERT(buf == table->record[0] ||
8007               buf == table->record[1]);
8008   DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8009                   return HA_ERR_INTERNAL_ERROR; );
8010 
8011   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8012   mark_trx_read_write();
8013 
8014   MYSQL_TABLE_IO_WAIT(m_psi, PSI_TABLE_DELETE_ROW, active_index, 0,
8015     { error= delete_row(buf);})
8016 
8017   MYSQL_DELETE_ROW_DONE(error);
8018   if (unlikely(error))
8019     return error;
8020   if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
8021     return error;
8022 
8023   if (likely(!is_fake_change_enabled(ha_thd())))
8024     rows_changed++;
8025 
8026   return 0;
8027 }
8028 
8029 /**
8030   @brief Offload an update to the storage engine. See handler::fast_update()
8031   for details.
8032 */
ha_fast_update(THD * thd,List<Item> & update_fields,List<Item> & update_values,Item * conds)8033 int handler::ha_fast_update(THD *thd,
8034                             List<Item> &update_fields,
8035                             List<Item> &update_values,
8036                             Item *conds)
8037 {
8038   int error= fast_update(thd, update_fields, update_values, conds);
8039   if (error == 0)
8040     mark_trx_read_write();
8041   return error;
8042 }
8043 
8044 /**
8045   @brief Offload an upsert to the storage engine. See handler::upsert()
8046   for details.
8047 */
ha_upsert(THD * thd,List<Item> & update_fields,List<Item> & update_values)8048 int handler::ha_upsert(THD *thd,
8049                        List<Item> &update_fields,
8050                        List<Item> &update_values)
8051 {
8052   int error= upsert(thd, update_fields, update_values);
8053   if (error == 0)
8054     mark_trx_read_write();
8055   return error;
8056 }
8057 
8058 /** @brief
8059   use_hidden_primary_key() is called in case of an update/delete when
8060   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8061   but we don't have a primary key
8062 */
use_hidden_primary_key()8063 void handler::use_hidden_primary_key()
8064 {
8065   /* fallback to use all columns in the table to identify row */
8066   table->use_all_columns();
8067 }
8068 
8069 
8070 /**
8071   Get an initialized ha_share.
8072 
8073   @return Initialized ha_share
8074     @retval NULL    ha_share is not yet initialized.
8075     @retval != NULL previous initialized ha_share.
8076 
8077   @note
8078   If not a temp table, then LOCK_ha_data must be held.
8079 */
8080 
get_ha_share_ptr()8081 Handler_share *handler::get_ha_share_ptr()
8082 {
8083   DBUG_ENTER("handler::get_ha_share_ptr");
8084   DBUG_ASSERT(ha_share && table_share);
8085 
8086 #ifndef DBUG_OFF
8087   if (table_share->tmp_table == NO_TMP_TABLE)
8088     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8089 #endif
8090 
8091   DBUG_RETURN(*ha_share);
8092 }
8093 
8094 
8095 /**
8096   Set ha_share to be used by all instances of the same table/partition.
8097 
8098   @param ha_share    Handler_share to be shared.
8099 
8100   @note
8101   If not a temp table, then LOCK_ha_data must be held.
8102 */
8103 
set_ha_share_ptr(Handler_share * arg_ha_share)8104 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8105 {
8106   DBUG_ENTER("handler::set_ha_share_ptr");
8107   DBUG_ASSERT(ha_share);
8108 #ifndef DBUG_OFF
8109   if (table_share->tmp_table == NO_TMP_TABLE)
8110     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8111 #endif
8112 
8113   *ha_share= arg_ha_share;
8114   DBUG_VOID_RETURN;
8115 }
8116 
8117 
8118 /**
8119   Take a lock for protecting shared handler data.
8120 */
8121 
lock_shared_ha_data()8122 void handler::lock_shared_ha_data()
8123 {
8124   DBUG_ASSERT(table_share);
8125   if (table_share->tmp_table == NO_TMP_TABLE)
8126     mysql_mutex_lock(&table_share->LOCK_ha_data);
8127 }
8128 
8129 
8130 /**
8131   Release lock for protecting ha_share.
8132 */
8133 
unlock_shared_ha_data()8134 void handler::unlock_shared_ha_data()
8135 {
8136   DBUG_ASSERT(table_share);
8137   if (table_share->tmp_table == NO_TMP_TABLE)
8138     mysql_mutex_unlock(&table_share->LOCK_ha_data);
8139 }
8140 
8141 
8142 /** @brief
8143   Dummy function which accept information about log files which is not need
8144   by handlers
8145 */
signal_log_not_needed(struct handlerton,char * log_file)8146 void signal_log_not_needed(struct handlerton, char *log_file)
8147 {
8148   DBUG_ENTER("signal_log_not_needed");
8149   DBUG_PRINT("enter", ("logfile '%s'", log_file));
8150   DBUG_VOID_RETURN;
8151 }
8152 
is_using_prohibited_gap_locks(TABLE * table,bool using_full_primary_key) const8153 bool handler::is_using_prohibited_gap_locks(TABLE* table,
8154                                             bool using_full_primary_key) const
8155 {
8156   THD* thd = table->in_use;
8157   thr_lock_type lock_type = table->reginfo.lock_type;
8158 
8159   if (!using_full_primary_key
8160       && has_transactions()
8161       && !has_gap_locks()
8162       && thd_tx_isolation(thd) >= ISO_REPEATABLE_READ
8163       && !thd->rli_slave
8164       && (thd->lex->table_count >= 2 || thd->in_multi_stmt_transaction_mode())
8165       && (lock_type >= TL_WRITE_ALLOW_WRITE ||
8166           lock_type == TL_READ_WITH_SHARED_LOCKS ||
8167           lock_type == TL_READ_NO_INSERT ||
8168           (lock_type != TL_IGNORE && thd->lex->sql_command != SQLCOM_SELECT)))
8169   {
8170     my_printf_error(ER_UNKNOWN_ERROR,
8171                     "Using Gap Lock without full unique key in multi-table "
8172                     "or multi-statement transactions is not "
8173                     "allowed. You need to either rewrite queries to use "
8174                     "all unique key columns in WHERE equal conditions, or "
8175                     "rewrite to single-table, single-statement "
8176                     "transaction.  Query: %s",
8177                     MYF(0), thd->query());
8178     return true;
8179   }
8180   return false;
8181 }
8182 
8183 
8184 #ifdef TRANS_LOG_MGM_EXAMPLE_CODE
8185 /*
8186   Example of transaction log management functions based on assumption that logs
8187   placed into a directory
8188 */
8189 #include <my_dir.h>
8190 #include <my_sys.h>
example_of_iterator_using_for_logs_cleanup(handlerton * hton)8191 int example_of_iterator_using_for_logs_cleanup(handlerton *hton)
8192 {
8193   void *buffer;
8194   int res= 1;
8195   struct handler_iterator iterator;
8196   struct handler_log_file_data data;
8197 
8198   if (!hton->create_iterator)
8199     return 1; /* iterator creator is not supported */
8200 
8201   if ((*hton->create_iterator)(hton, HA_TRANSACTLOG_ITERATOR, &iterator) !=
8202       HA_ITERATOR_OK)
8203   {
8204     /* error during creation of log iterator or iterator is not supported */
8205     return 1;
8206   }
8207   while((*iterator.next)(&iterator, (void*)&data) == 0)
8208   {
8209     printf("%s\n", data.filename.str);
8210     if (data.status == HA_LOG_STATUS_FREE &&
8211         mysql_file_delete(INSTRUMENT_ME,
8212                           data.filename.str, MYF(MY_WME)))
8213       goto err;
8214   }
8215   res= 0;
8216 err:
8217   (*iterator.destroy)(&iterator);
8218   return res;
8219 }
8220 
8221 
8222 /*
8223   Here we should get info from handler where it save logs but here is
8224   just example, so we use constant.
8225   IMHO FN_ROOTDIR ("/") is safe enough for example, because nobody has
8226   rights on it except root and it consist of directories only at lest for
8227   *nix (sorry, can't find windows-safe solution here, but it is only example).
8228 */
8229 #define fl_dir FN_ROOTDIR
8230 
8231 
8232 /** @brief
8233   Dummy function to return log status should be replaced by function which
8234   really detect the log status and check that the file is a log of this
8235   handler.
8236 */
fl_get_log_status(char * log)8237 enum log_status fl_get_log_status(char *log)
8238 {
8239   MY_STAT stat_buff;
8240   if (mysql_file_stat(INSTRUMENT_ME, log, &stat_buff, MYF(0)))
8241     return HA_LOG_STATUS_INUSE;
8242   return HA_LOG_STATUS_NOSUCHLOG;
8243 }
8244 
8245 
8246 struct fl_buff
8247 {
8248   LEX_STRING *names;
8249   enum log_status *statuses;
8250   uint32 entries;
8251   uint32 current;
8252 };
8253 
8254 
fl_log_iterator_next(struct handler_iterator * iterator,void * iterator_object)8255 int fl_log_iterator_next(struct handler_iterator *iterator,
8256                           void *iterator_object)
8257 {
8258   struct fl_buff *buff= (struct fl_buff *)iterator->buffer;
8259   struct handler_log_file_data *data=
8260     (struct handler_log_file_data *) iterator_object;
8261   if (buff->current >= buff->entries)
8262     return 1;
8263   data->filename= buff->names[buff->current];
8264   data->status= buff->statuses[buff->current];
8265   buff->current++;
8266   return 0;
8267 }
8268 
8269 
fl_log_iterator_destroy(struct handler_iterator * iterator)8270 void fl_log_iterator_destroy(struct handler_iterator *iterator)
8271 {
8272   my_free(iterator->buffer);
8273 }
8274 
8275 
8276 /** @brief
8277   returns buffer, to be assigned in handler_iterator struct
8278 */
8279 enum handler_create_iterator_result
fl_log_iterator_buffer_init(struct handler_iterator * iterator)8280 fl_log_iterator_buffer_init(struct handler_iterator *iterator)
8281 {
8282   MY_DIR *dirp;
8283   struct fl_buff *buff;
8284   char *name_ptr;
8285   uchar *ptr;
8286   FILEINFO *file;
8287   uint32 i;
8288 
8289   /* to be able to make my_free without crash in case of error */
8290   iterator->buffer= 0;
8291 
8292   if (!(dirp = my_dir(fl_dir, MYF(0))))
8293   {
8294     return HA_ITERATOR_ERROR;
8295   }
8296   if ((ptr= (uchar*)my_malloc(ALIGN_SIZE(sizeof(fl_buff)) +
8297                              ((ALIGN_SIZE(sizeof(LEX_STRING)) +
8298                                sizeof(enum log_status) +
8299                                + FN_REFLEN + 1) *
8300                               (uint) dirp->number_off_files),
8301                              MYF(0))) == 0)
8302   {
8303     return HA_ITERATOR_ERROR;
8304   }
8305   buff= (struct fl_buff *)ptr;
8306   buff->entries= buff->current= 0;
8307   ptr= ptr + (ALIGN_SIZE(sizeof(fl_buff)));
8308   buff->names= (LEX_STRING*) (ptr);
8309   ptr= ptr + ((ALIGN_SIZE(sizeof(LEX_STRING)) *
8310                (uint) dirp->number_off_files));
8311   buff->statuses= (enum log_status *)(ptr);
8312   name_ptr= (char *)(ptr + (sizeof(enum log_status) *
8313                             (uint) dirp->number_off_files));
8314   for (i=0 ; i < (uint) dirp->number_off_files  ; i++)
8315   {
8316     enum log_status st;
8317     file= dirp->dir_entry + i;
8318     if ((file->name[0] == '.' &&
8319          ((file->name[1] == '.' && file->name[2] == '\0') ||
8320             file->name[1] == '\0')))
8321       continue;
8322     if ((st= fl_get_log_status(file->name)) == HA_LOG_STATUS_NOSUCHLOG)
8323       continue;
8324     name_ptr= strxnmov(buff->names[buff->entries].str= name_ptr,
8325                        FN_REFLEN, fl_dir, file->name, NullS);
8326     buff->names[buff->entries].length= (name_ptr -
8327                                         buff->names[buff->entries].str);
8328     buff->statuses[buff->entries]= st;
8329     buff->entries++;
8330   }
8331 
8332   iterator->buffer= buff;
8333   iterator->next= &fl_log_iterator_next;
8334   iterator->destroy= &fl_log_iterator_destroy;
8335   return HA_ITERATOR_OK;
8336 }
8337 
8338 
8339 /* An example of a iterator creator */
8340 enum handler_create_iterator_result
fl_create_iterator(enum handler_iterator_type type,struct handler_iterator * iterator)8341 fl_create_iterator(enum handler_iterator_type type,
8342                    struct handler_iterator *iterator)
8343 {
8344   switch(type) {
8345   case HA_TRANSACTLOG_ITERATOR:
8346     return fl_log_iterator_buffer_init(iterator);
8347   default:
8348     return HA_ITERATOR_UNSUPPORTED;
8349   }
8350 }
8351 #endif /*TRANS_LOG_MGM_EXAMPLE_CODE*/
8352 
8353 
8354 /**
8355    Report a warning for FK constraint violation.
8356 
8357    @param  thd     Thread handle.
8358    @param  table   table on which the operation is performed.
8359    @param  error   handler error number.
8360 */
warn_fk_constraint_violation(THD * thd,TABLE * table,int error)8361 void warn_fk_constraint_violation(THD *thd,TABLE *table, int error)
8362 {
8363   String str;
8364   switch(error) {
8365   case HA_ERR_ROW_IS_REFERENCED:
8366     table->file->get_error_message(error, &str);
8367     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
8368                  ER_ROW_IS_REFERENCED_2, str.c_ptr_safe());
8369     break;
8370   case HA_ERR_NO_REFERENCED_ROW:
8371     table->file->get_error_message(error, &str);
8372     push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
8373                  ER_NO_REFERENCED_ROW_2, str.c_ptr_safe());
8374     break;
8375   }
8376 }
8377 
8378 
8379 /**
8380   Checks if the file name is reserved word used by SE by invoking
8381   the handlerton method.
8382 
8383   @param  unused1       thread handler which is unused.
8384   @param  plugin        SE plugin.
8385   @param  name          Database name.
8386 
8387   @retval true          If the name is reserved word.
8388   @retval false         If the name is not reserved word.
8389 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)8390 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
8391                                               void *name)
8392 {
8393   handlerton *hton= plugin_data(plugin, handlerton *);
8394   if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8395     return (hton->is_reserved_db_name(hton, (const char *)name));
8396   return false;
8397 }
8398 
8399 
8400 /**
8401    Check if the file name is reserved word used by SE.
8402 
8403    @param  name    Database name.
8404 
8405    @retval true    If the name is a reserved word.
8406    @retval false   If the name is not a reserved word.
8407 */
ha_check_reserved_db_name(const char * name)8408 bool ha_check_reserved_db_name(const char* name)
8409 {
8410   return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
8411                          MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
8412 }
8413