1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 /** @file handler.cc
24 
25     @brief
26   Handler-calling-functions
27 */
28 
29 #include "handler.h"
30 
31 #include "my_bit.h"                   // my_count_bits
32 #include "myisam.h"                   // TT_FOR_UPGRADE
33 #include "mysql_version.h"            // MYSQL_VERSION_ID
34 
35 #include "binlog.h"                   // mysql_bin_log
36 #include "debug_sync.h"               // DEBUG_SYNC
37 #include "discover.h"                 // writefrm
38 #include "log.h"                      // sql_print_error
39 #include "log_event.h"                // Write_rows_log_event
40 #include "my_bitmap.h"                // MY_BITMAP
41 #include "probes_mysql.h"             // MYSQL_HANDLER_WRLOCK_START
42 #include "opt_costconstantcache.h"    // reload_optimizer_cost_constants
43 #include "rpl_handler.h"              // RUN_HOOK
44 #include "sql_base.h"                 // free_io_cache
45 #include "sql_parse.h"                // check_stack_overrun
46 #include "sql_plugin.h"               // plugin_foreach
47 #include "sql_table.h"                // build_table_filename
48 #include "transaction.h"              // trans_commit_implicit
49 #include "trigger_def.h"              // TRG_EXT
50 #include "sql_select.h"               // actual_key_parts
51 #include "rpl_write_set_handler.h"    // add_pke
52 #include "auth_common.h"              // check_readonly() and SUPER_ACL
53 
54 
55 #include "pfs_file_provider.h"
56 #include "mysql/psi/mysql_file.h"
57 
58 #include <pfs_table_provider.h>
59 #include <mysql/psi/mysql_table.h>
60 
61 #include <pfs_transaction_provider.h>
62 #include <mysql/psi/mysql_transaction.h>
63 #include "opt_hints.h"
64 
65 #include <list>
66 #include <cstring>
67 #include <string>
68 #include <boost/foreach.hpp>
69 #include <boost/tokenizer.hpp>
70 #include <boost/algorithm/string.hpp>
71 
72 /**
73   @def MYSQL_TABLE_IO_WAIT
74   Instrumentation helper for table io_waits.
75   Note that this helper is intended to be used from
76   within the handler class only, as it uses members
77   from @c handler
78   Performance schema events are instrumented as follows:
79   - in non batch mode, one event is generated per call
80   - in batch mode, the number of rows affected is saved
81   in @c m_psi_numrows, so that @c end_psi_batch_mode()
82   generates a single event for the batch.
83   @param OP the table operation to be performed
84   @param INDEX the table index used if any, or MAX_KEY.
85   @param PAYLOAD instrumented code to execute
86   @sa handler::end_psi_batch_mode.
87 */
88 #ifdef HAVE_PSI_TABLE_INTERFACE
89   #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD)     \
90     {                                                         \
91       if (m_psi != NULL)                                      \
92       {                                                       \
93         switch (m_psi_batch_mode)                             \
94         {                                                     \
95           case PSI_BATCH_MODE_NONE:                           \
96           {                                                   \
97             PSI_table_locker *sub_locker= NULL;               \
98             PSI_table_locker_state reentrant_safe_state;      \
99             sub_locker= PSI_TABLE_CALL(start_table_io_wait)   \
100               (& reentrant_safe_state, m_psi, OP, INDEX,      \
101                __FILE__, __LINE__);                           \
102             PAYLOAD                                           \
103             if (sub_locker != NULL)                           \
104               PSI_TABLE_CALL(end_table_io_wait)               \
105                 (sub_locker, 1);                              \
106             break;                                            \
107           }                                                   \
108           case PSI_BATCH_MODE_STARTING:                       \
109           {                                                   \
110             m_psi_locker= PSI_TABLE_CALL(start_table_io_wait) \
111               (& m_psi_locker_state, m_psi, OP, INDEX,        \
112                __FILE__, __LINE__);                           \
113             PAYLOAD                                           \
114             if (!RESULT)                                      \
115               m_psi_numrows++;                                \
116             m_psi_batch_mode= PSI_BATCH_MODE_STARTED;         \
117             break;                                            \
118           }                                                   \
119           case PSI_BATCH_MODE_STARTED:                        \
120           default:                                            \
121           {                                                   \
122             assert(m_psi_batch_mode                           \
123                    == PSI_BATCH_MODE_STARTED);                \
124             PAYLOAD                                           \
125             if (!RESULT)                                      \
126               m_psi_numrows++;                                \
127             break;                                            \
128           }                                                   \
129         }                                                     \
130       }                                                       \
131       else                                                    \
132       {                                                       \
133         PAYLOAD                                               \
134       }                                                       \
135     }
136 #else
137   #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
138     PAYLOAD
139 #endif
140 
141 /**
142   @def MYSQL_TABLE_LOCK_WAIT
143   Instrumentation helper for table io_waits.
144   @param OP the table operation to be performed
145   @param FLAGS per table operation flags.
146   @param PAYLOAD the code to instrument.
147   @sa MYSQL_END_TABLE_WAIT.
148 */
149 #ifdef HAVE_PSI_TABLE_INTERFACE
150   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD)    \
151     {                                                  \
152       if (m_psi != NULL)                               \
153       {                                                \
154         PSI_table_locker *locker;                      \
155         PSI_table_locker_state state;                  \
156         locker= PSI_TABLE_CALL(start_table_lock_wait)  \
157           (& state, m_psi, OP, FLAGS,                  \
158           __FILE__, __LINE__);                         \
159         PAYLOAD                                        \
160         if (locker != NULL)                            \
161           PSI_TABLE_CALL(end_table_lock_wait)(locker); \
162       }                                                \
163       else                                             \
164       {                                                \
165         PAYLOAD                                        \
166       }                                                \
167     }
168 #else
169   #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
170     PAYLOAD
171 #endif
172 
173 using std::min;
174 using std::max;
175 using std::list;
176 
177 // This is a temporary backporting fix.
178 #ifndef HAVE_LOG2
179 /*
180   This will be slightly slower and perhaps a tiny bit less accurate than
181   doing it the IEEE754 way but log2() should be available on C99 systems.
182 */
log2(double x)183 inline double log2(double x)
184 {
185   return (log(x) / M_LN2);
186 }
187 #endif
188 
189 /*
190   While we have legacy_db_type, we have this array to
191   check for dups and to find handlerton from legacy_db_type.
192   Remove when legacy_db_type is finally gone
193 */
194 st_plugin_int *hton2plugin[MAX_HA];
195 
196 /**
197   Array allowing to check if handlerton is builtin without
198   acquiring LOCK_plugin.
199 */
200 static bool builtin_htons[MAX_HA];
201 
ha_resolve_storage_engine_name(const handlerton * db_type)202 const char *ha_resolve_storage_engine_name(const handlerton *db_type)
203 {
204   return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
205 }
206 
207 static handlerton *installed_htons[128];
208 
209 #define BITMAP_STACKBUF_SIZE (128/8)
210 
211 KEY_CREATE_INFO default_key_create_info=
212   { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
213 
214 /* number of entries in handlertons[] */
215 ulong total_ha= 0;
216 /* number of storage engines (from handlertons[]) that support 2pc */
217 ulong total_ha_2pc= 0;
218 /* size of savepoint storage area (see ha_init) */
219 ulong savepoint_alloc_size= 0;
220 
221 static const LEX_STRING sys_table_aliases[]=
222 {
223   { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
224   { C_STRING_WITH_LEN("NDB") },       { C_STRING_WITH_LEN("NDBCLUSTER") },
225   { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
226   { C_STRING_WITH_LEN("MERGE") },     { C_STRING_WITH_LEN("MRG_MYISAM") },
227   {NullS, 0}
228 };
229 
230 const char *ha_row_type[] = {
231   "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
232   /* Reserved to be "PAGE" in future versions */ "?",
233   "?","?","?"
234 };
235 
236 const char *tx_isolation_names[] =
237 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
238   NullS};
239 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
240 			       tx_isolation_names, NULL};
241 
242 #ifndef NDEBUG
243 
ha_legacy_type_name(legacy_db_type legacy_type)244 const char *ha_legacy_type_name(legacy_db_type legacy_type)
245 {
246   switch (legacy_type)
247   {
248   case DB_TYPE_UNKNOWN:
249     return "DB_TYPE_UNKNOWN";
250   case DB_TYPE_DIAB_ISAM:
251     return "DB_TYPE_DIAB_ISAM";
252   case DB_TYPE_HASH:
253     return "DB_TYPE_HASH";
254   case DB_TYPE_MISAM:
255     return "DB_TYPE_MISAM";
256   case DB_TYPE_PISAM:
257     return "DB_TYPE_PISAM";
258   case DB_TYPE_RMS_ISAM:
259     return "DB_TYPE_RMS_ISAM";
260   case DB_TYPE_HEAP:
261     return "DB_TYPE_HEAP";
262   case DB_TYPE_ISAM:
263     return "DB_TYPE_ISAM";
264   case DB_TYPE_MRG_ISAM:
265     return "DB_TYPE_MRG_ISAM";
266   case DB_TYPE_MYISAM:
267     return "DB_TYPE_MYISAM";
268   case DB_TYPE_MRG_MYISAM:
269     return "DB_TYPE_MRG_MYISAM";
270   case DB_TYPE_BERKELEY_DB:
271     return "DB_TYPE_BERKELEY_DB";
272   case DB_TYPE_INNODB:
273     return "DB_TYPE_INNODB";
274   case DB_TYPE_GEMINI:
275     return "DB_TYPE_GEMINI";
276   case DB_TYPE_NDBCLUSTER:
277     return "DB_TYPE_NDBCLUSTER";
278   case DB_TYPE_EXAMPLE_DB:
279     return "DB_TYPE_EXAMPLE_DB";
280   case DB_TYPE_ARCHIVE_DB:
281     return "DB_TYPE_ARCHIVE_DB";
282   case DB_TYPE_CSV_DB:
283     return "DB_TYPE_CSV_DB";
284   case DB_TYPE_FEDERATED_DB:
285     return "DB_TYPE_FEDERATED_DB";
286   case DB_TYPE_BLACKHOLE_DB:
287     return "DB_TYPE_BLACKHOLE_DB";
288   case DB_TYPE_PARTITION_DB:
289     return "DB_TYPE_PARTITION_DB";
290   case DB_TYPE_BINLOG:
291     return "DB_TYPE_BINLOG";
292   case DB_TYPE_SOLID:
293     return "DB_TYPE_SOLID";
294   case DB_TYPE_PBXT:
295     return "DB_TYPE_PBXT";
296   case DB_TYPE_TABLE_FUNCTION:
297     return "DB_TYPE_TABLE_FUNCTION";
298   case DB_TYPE_MEMCACHE:
299     return "DB_TYPE_MEMCACHE";
300   case DB_TYPE_FALCON:
301     return "DB_TYPE_FALCON";
302   case DB_TYPE_MARIA:
303     return "DB_TYPE_MARIA";
304   case DB_TYPE_PERFORMANCE_SCHEMA:
305     return "DB_TYPE_PERFORMANCE_SCHEMA";
306   default:
307     return "DB_TYPE_DYNAMIC";
308   }
309 }
310 #endif
311 
312 /**
313   Database name that hold most of mysqld system tables.
314   Current code assumes that, there exists only some
315   specific "database name" designated as system database.
316 */
317 const char* mysqld_system_database= "mysql";
318 
319 // System tables that belong to mysqld_system_database.
320 st_handler_tablename mysqld_system_tables[]= {
321   {mysqld_system_database, "db"},
322   {mysqld_system_database, "user"},
323   {mysqld_system_database, "host"},
324   {mysqld_system_database, "func"},
325   {mysqld_system_database, "proc"},
326   {mysqld_system_database, "event"},
327   {mysqld_system_database, "plugin"},
328   {mysqld_system_database, "servers"},
329   {mysqld_system_database, "procs_priv"},
330   {mysqld_system_database, "tables_priv"},
331   {mysqld_system_database, "proxies_priv"},
332   {mysqld_system_database, "columns_priv"},
333   {mysqld_system_database, "time_zone"},
334   {mysqld_system_database, "time_zone_name"},
335   {mysqld_system_database, "time_zone_leap_second"},
336   {mysqld_system_database, "time_zone_transition"},
337   {mysqld_system_database, "time_zone_transition_type"},
338   {mysqld_system_database, "help_category"},
339   {mysqld_system_database, "help_keyword"},
340   {mysqld_system_database, "help_relation"},
341   {mysqld_system_database, "help_topic"},
342   {mysqld_system_database, "innodb_table_stats"},
343   {mysqld_system_database, "innodb_index_stats"},
344   {(const char *)NULL, (const char *)NULL} /* This must be at the end */
345 };
346 
347 /**
348   This static pointer holds list of system databases from SQL layer and
349   various SE's. The required memory is allocated once, and never freed.
350 */
351 static const char **known_system_databases= NULL;
352 static const char **ha_known_system_databases();
353 
354 // Called for each SE to get SE specific system database.
355 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
356                                            void *arg);
357 
358 // Called for each SE to check if given db.table_name is a system table.
359 static my_bool check_engine_system_table_handlerton(THD *unused,
360                                                     plugin_ref plugin,
361                                                     void *arg);
362 /**
363   Structure used by SE during check for system table.
364   This structure is passed to each SE handlerton and the status (OUT param)
365   is collected.
366 */
367 struct st_sys_tbl_chk_params
368 {
369   const char *db;                             // IN param
370   const char *table_name;                     // IN param
371   bool is_sql_layer_system_table;             // IN param
372   legacy_db_type db_type;                     // IN param
373 
374   enum enum_status
375   {
376     // db.table_name is user table.
377     USER_TABLE,
378     /*
379       db.table_name is a system table,
380       but may not be supported by SE.
381     */
382     SYSTEM_TABLE,
383     /*
384       db.table_name is a system table,
385       and is supported by SE.
386     */
387     SE_SUPPORTED_SYSTEM_TABLE
388   } status;                                    // OUT param
389 };
390 
391 
ha_default_plugin(THD * thd)392 static plugin_ref ha_default_plugin(THD *thd)
393 {
394   if (thd->variables.table_plugin)
395     return thd->variables.table_plugin;
396   return my_plugin_lock(thd, &global_system_variables.table_plugin);
397 }
398 
399 
400 /** @brief
401   Return the default storage engine handlerton used for non-temp tables
402   for thread
403 
404   SYNOPSIS
405     ha_default_handlerton(thd)
406     thd         current thread
407 
408   RETURN
409     pointer to handlerton
410 */
ha_default_handlerton(THD * thd)411 handlerton *ha_default_handlerton(THD *thd)
412 {
413   plugin_ref plugin= ha_default_plugin(thd);
414   assert(plugin);
415   handlerton *hton= plugin_data<handlerton*>(plugin);
416   assert(hton);
417   return hton;
418 }
419 
420 
ha_default_temp_plugin(THD * thd)421 static plugin_ref ha_default_temp_plugin(THD *thd)
422 {
423   if (thd->variables.temp_table_plugin)
424     return thd->variables.temp_table_plugin;
425   return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
426 }
427 
428 
429 /** @brief
430   Return the default storage engine handlerton used for explicitly
431   created temp tables for a thread
432 
433   SYNOPSIS
434     ha_default_temp_handlerton(thd)
435     thd         current thread
436 
437   RETURN
438     pointer to handlerton
439 */
ha_default_temp_handlerton(THD * thd)440 handlerton *ha_default_temp_handlerton(THD *thd)
441 {
442   plugin_ref plugin= ha_default_temp_plugin(thd);
443   assert(plugin);
444   handlerton *hton= plugin_data<handlerton*>(plugin);
445   assert(hton);
446   return hton;
447 }
448 
449 
450 /**
451   Resolve handlerton plugin by name, without checking for "DEFAULT" or
452   HTON_NOT_USER_SELECTABLE.
453 
454   @param thd  Thread context.
455   @param name Plugin name.
456 
457   @return plugin or NULL if not found.
458 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)459 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name)
460 {
461   return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
462 }
463 
464 /** @brief
465   Return the storage engine handlerton for the supplied name
466 
467   SYNOPSIS
468     ha_resolve_by_name(thd, name)
469     thd         current thread
470     name        name of storage engine
471 
472   RETURN
473     pointer to storage engine plugin handle
474 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)475 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
476                               bool is_temp_table)
477 {
478   const LEX_STRING *table_alias;
479   plugin_ref plugin;
480 
481 redo:
482   /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
483   if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
484                            (const uchar *)name->str, name->length,
485                            (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
486     return is_temp_table ?
487       ha_default_plugin(thd) : ha_default_temp_plugin(thd);
488 
489   LEX_CSTRING cstring_name= {name->str, name->length};
490   if ((plugin= ha_resolve_by_name_raw(thd, cstring_name)))
491   {
492     handlerton *hton= plugin_data<handlerton*>(plugin);
493     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
494       return plugin;
495 
496     /*
497       unlocking plugin immediately after locking is relatively low cost.
498     */
499     plugin_unlock(thd, plugin);
500   }
501 
502   /*
503     We check for the historical aliases.
504   */
505   for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
506   {
507     if (!my_strnncoll(&my_charset_latin1,
508                       (const uchar *)name->str, name->length,
509                       (const uchar *)table_alias->str, table_alias->length))
510     {
511       name= table_alias + 1;
512       goto redo;
513     }
514   }
515 
516   return NULL;
517 }
518 
519 std::string normalized_se_str= "";
520 
521 /*
522   Parse comma separated list of disabled storage engine names
523   and create a normalized string by appending storage names that
524   have aliases. This normalized string is used to disallow
525   table/tablespace creation under the storage engines specified.
526 */
ha_set_normalized_disabled_se_str(const std::string & disabled_se)527 void ha_set_normalized_disabled_se_str(const std::string &disabled_se)
528 {
529   boost::char_separator<char> sep(",");
530   boost::tokenizer< boost::char_separator<char> > tokens(disabled_se, sep);
531   normalized_se_str.append(",");
532   BOOST_FOREACH (std::string se_name, tokens)
533   {
534     const LEX_STRING *table_alias;
535     boost::algorithm::to_upper(se_name);
536     for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
537     {
538       if (!strcasecmp(se_name.c_str(), table_alias->str) ||
539           !strcasecmp(se_name.c_str(), (table_alias+1)->str))
540       {
541         normalized_se_str.append(std::string(table_alias->str) + "," +
542                                  std::string((table_alias+1)->str) + ",");
543         break;
544       }
545     }
546 
547     if (table_alias->str == NULL)
548       normalized_se_str.append(se_name+",");
549   }
550 }
551 
552 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)553 bool ha_is_storage_engine_disabled(handlerton *se_handle)
554 {
555   if (normalized_se_str.size())
556   {
557     std::string se_name(",");
558     se_name.append(ha_resolve_storage_engine_name(se_handle));
559     se_name.append(",");
560     boost::algorithm::to_upper(se_name);
561     if(strstr(normalized_se_str.c_str(), se_name.c_str()))
562       return true;
563   }
564   return false;
565 }
566 
567 
ha_lock_engine(THD * thd,const handlerton * hton)568 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
569 {
570   if (hton)
571   {
572     st_plugin_int **plugin= hton2plugin + hton->slot;
573 
574 #ifdef NDEBUG
575     /*
576       Take a shortcut for builtin engines -- return pointer to plugin
577       without acquiring LOCK_plugin mutex. This is safe safe since such
578       plugins are not deleted until shutdown and we don't do reference
579       counting in non-debug builds for them.
580 
581       Since we have reference to handlerton on our hands, this method
582       can't be called concurrently to non-builtin handlerton initialization/
583       deinitialization. So it is safe to access builtin_htons[] without
584       additional locking.
585      */
586     if (builtin_htons[hton->slot])
587       return *plugin;
588 
589     return my_plugin_lock(thd, plugin);
590 #else
591     /*
592       We can't take shortcut in debug builds.
593       At least assert that builtin_htons[slot] is set correctly.
594     */
595     assert(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == NULL));
596     return my_plugin_lock(thd, &plugin);
597 #endif
598   }
599   return NULL;
600 }
601 
602 
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)603 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
604 {
605   plugin_ref plugin;
606   switch (db_type) {
607   case DB_TYPE_DEFAULT:
608     return ha_default_handlerton(thd);
609   default:
610     if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
611         (plugin= ha_lock_engine(thd, installed_htons[db_type])))
612       return plugin_data<handlerton*>(plugin);
613     /* fall through */
614   case DB_TYPE_UNKNOWN:
615     return NULL;
616   }
617 }
618 
619 
620 /**
621   Use other database handler if databasehandler is not compiled in.
622 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)623 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
624                           bool no_substitute, bool report_error)
625 {
626   handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
627   if (ha_storage_engine_is_enabled(hton))
628     return hton;
629 
630   if (no_substitute)
631   {
632     if (report_error)
633     {
634       const char *engine_name= ha_resolve_storage_engine_name(hton);
635       my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
636     }
637     return NULL;
638   }
639 
640   (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
641 
642   switch (database_type) {
643   case DB_TYPE_MRG_ISAM:
644     return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
645   default:
646     break;
647   }
648 
649   return ha_default_handlerton(thd);
650 } /* ha_checktype */
651 
652 
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)653 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
654                          handlerton *db_type)
655 {
656   handler *file;
657   DBUG_ENTER("get_new_handler");
658   DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
659 
660   if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
661   {
662     if ((file= db_type->create(db_type, share, alloc)))
663       file->init();
664     DBUG_RETURN(file);
665   }
666   /*
667     Try the default table type
668     Here the call to current_thd() is ok as we call this function a lot of
669     times but we enter this branch very seldom.
670   */
671   DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
672 }
673 
674 
675 static const char **handler_errmsgs;
676 
677 C_MODE_START
get_handler_errmsg(int nr)678 static const char *get_handler_errmsg(int nr)
679 {
680   return handler_errmsgs[nr - HA_ERR_FIRST];
681 }
682 C_MODE_END
683 
684 
685 /**
686   Register handler error messages for use with my_error().
687 
688   @retval
689     0           OK
690   @retval
691     !=0         Error
692 */
693 
ha_init_errors(void)694 int ha_init_errors(void)
695 {
696 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
697 
698   /* Allocate a pointer array for the error message strings. */
699   /* Zerofill it to avoid uninitialized gaps. */
700   if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
701                                                    HA_ERR_ERRORS * sizeof(char*),
702                                                    MYF(MY_WME | MY_ZEROFILL))))
703     return 1;
704 
705   /* Set the dedicated error messages. */
706   SETMSG(HA_ERR_KEY_NOT_FOUND,          ER_DEFAULT(ER_KEY_NOT_FOUND));
707   SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER_DEFAULT(ER_DUP_KEY));
708   SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
709   SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
710   SETMSG(HA_ERR_CRASHED,                ER_DEFAULT(ER_NOT_KEYFILE));
711   SETMSG(HA_ERR_WRONG_IN_RECORD,        ER_DEFAULT(ER_CRASHED_ON_USAGE));
712   SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
713   SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
714   SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
715   SETMSG(HA_ERR_OLD_FILE,               ER_DEFAULT(ER_OLD_KEYFILE));
716   SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
717   SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
718   SETMSG(HA_ERR_RECORD_FILE_FULL,       ER_DEFAULT(ER_RECORD_FILE_FULL));
719   SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
720   SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
721   SETMSG(HA_ERR_UNSUPPORTED,            ER_DEFAULT(ER_ILLEGAL_HA));
722   SETMSG(HA_ERR_TOO_BIG_ROW,            "Too big row");
723   SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
724   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER_DEFAULT(ER_DUP_UNIQUE));
725   SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
726   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER_DEFAULT(ER_WRONG_MRG_TABLE));
727   SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER_DEFAULT(ER_CRASHED_ON_REPAIR));
728   SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER_DEFAULT(ER_CRASHED_ON_USAGE));
729   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
730   SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER_DEFAULT(ER_LOCK_TABLE_FULL));
731   SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
732   SETMSG(HA_ERR_LOCK_DEADLOCK,          ER_DEFAULT(ER_LOCK_DEADLOCK));
733   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
734   SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
735   SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
736   SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
737   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
738   SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
739   SETMSG(HA_ERR_TABLE_EXIST,            ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
740   SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
741   SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER_DEFAULT(ER_TABLE_DEF_CHANGED));
742   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
743   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
744   SETMSG(HA_ERR_TABLE_READONLY,         ER_DEFAULT(ER_OPEN_AS_READONLY));
745   SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER_DEFAULT(ER_AUTOINC_READ_FAILED));
746   SETMSG(HA_ERR_AUTOINC_ERANGE,         ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
747   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
748   SETMSG(HA_ERR_INDEX_COL_TOO_LONG,     ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
749   SETMSG(HA_ERR_INDEX_CORRUPT,          ER_DEFAULT(ER_INDEX_CORRUPT));
750   SETMSG(HA_FTS_INVALID_DOCID,          "Invalid InnoDB FTS Doc ID");
751   SETMSG(HA_ERR_TABLE_IN_FK_CHECK,	ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
752   SETMSG(HA_ERR_TABLESPACE_EXISTS,      "Tablespace already exists");
753   SETMSG(HA_ERR_TABLESPACE_MISSING,     ER_DEFAULT(ER_TABLESPACE_MISSING));
754   SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,  "FTS query exceeds result cache limit");
755   SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,	ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
756   SETMSG(HA_ERR_INNODB_FORCED_RECOVERY,	ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
757   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
758   SETMSG(HA_ERR_TABLE_CORRUPT,		ER_DEFAULT(ER_TABLE_CORRUPT));
759   SETMSG(HA_ERR_TABLESPACE_MISSING,	ER_DEFAULT(ER_TABLESPACE_MISSING));
760   SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY,	ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
761   SETMSG(HA_ERR_WRONG_FILE_NAME,		ER_DEFAULT(ER_WRONG_FILE_NAME));
762   SETMSG(HA_ERR_NOT_ALLOWED_COMMAND,		ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
763   SETMSG(HA_ERR_COMPUTE_FAILED,		"Compute virtual column value failed");
764   SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP,  "Too many nested sub-expressions in a full-text search");
765   /* Register the error messages for use with my_error(). */
766   return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
767 }
768 
769 
ha_finalize_handlerton(st_plugin_int * plugin)770 int ha_finalize_handlerton(st_plugin_int *plugin)
771 {
772   handlerton *hton= (handlerton *)plugin->data;
773   DBUG_ENTER("ha_finalize_handlerton");
774 
775   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
776   if (!hton)
777     goto end;
778 
779   switch (hton->state)
780   {
781   case SHOW_OPTION_NO:
782   case SHOW_OPTION_DISABLED:
783     break;
784   case SHOW_OPTION_YES:
785     if (installed_htons[hton->db_type] == hton)
786       installed_htons[hton->db_type]= NULL;
787     break;
788   };
789 
790   if (hton->panic)
791     hton->panic(hton, HA_PANIC_CLOSE);
792 
793   if (plugin->plugin->deinit)
794   {
795     /*
796       Today we have no defined/special behavior for uninstalling
797       engine plugins.
798     */
799     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
800     if (plugin->plugin->deinit(NULL))
801     {
802       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
803                              plugin->name.str));
804     }
805   }
806 
807   /*
808     In case a plugin is uninstalled and re-installed later, it should
809     reuse an array slot. Otherwise the number of uninstall/install
810     cycles would be limited.
811   */
812   if (hton->slot != HA_SLOT_UNDEF)
813   {
814     /* Make sure we are not unpluging another plugin */
815     assert(hton2plugin[hton->slot] == plugin);
816     assert(hton->slot < MAX_HA);
817     hton2plugin[hton->slot]= NULL;
818     builtin_htons[hton->slot]= false; /* Extra correctness. */
819   }
820 
821   my_free(hton);
822 
823  end:
824   DBUG_RETURN(0);
825 }
826 
827 
ha_initialize_handlerton(st_plugin_int * plugin)828 int ha_initialize_handlerton(st_plugin_int *plugin)
829 {
830   handlerton *hton;
831   DBUG_ENTER("ha_initialize_handlerton");
832   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
833 
834   hton= (handlerton *)my_malloc(key_memory_handlerton,
835                                 sizeof(handlerton),
836                                 MYF(MY_WME | MY_ZEROFILL));
837 
838   if (hton == NULL)
839   {
840     sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
841                     plugin->name.str);
842     goto err_no_hton_memory;
843   }
844 
845   hton->slot= HA_SLOT_UNDEF;
846   /* Historical Requirement */
847   plugin->data= hton; // shortcut for the future
848   if (plugin->plugin->init && plugin->plugin->init(hton))
849   {
850     sql_print_error("Plugin '%s' init function returned error.",
851                     plugin->name.str);
852     goto err;
853   }
854 
855   /*
856     the switch below and hton->state should be removed when
857     command-line options for plugins will be implemented
858   */
859   DBUG_PRINT("info", ("hton->state=%d", hton->state));
860   switch (hton->state) {
861   case SHOW_OPTION_NO:
862     break;
863   case SHOW_OPTION_YES:
864     {
865       uint tmp;
866       ulong fslot;
867       /* now check the db_type for conflict */
868       if (hton->db_type <= DB_TYPE_UNKNOWN ||
869           hton->db_type >= DB_TYPE_DEFAULT ||
870           installed_htons[hton->db_type])
871       {
872         int idx= (int) DB_TYPE_FIRST_DYNAMIC;
873 
874         while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
875           idx++;
876 
877         if (idx == (int) DB_TYPE_DEFAULT)
878         {
879           sql_print_warning("Too many storage engines!");
880           goto err_deinit;
881         }
882         if (hton->db_type != DB_TYPE_UNKNOWN)
883           sql_print_warning("Storage engine '%s' has conflicting typecode. "
884                             "Assigning value %d.", plugin->plugin->name, idx);
885         hton->db_type= (enum legacy_db_type) idx;
886       }
887 
888       /*
889         In case a plugin is uninstalled and re-installed later, it should
890         reuse an array slot. Otherwise the number of uninstall/install
891         cycles would be limited. So look for a free slot.
892       */
893       DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
894       for (fslot= 0; fslot < total_ha; fslot++)
895       {
896         if (!hton2plugin[fslot])
897           break;
898       }
899       if (fslot < total_ha)
900         hton->slot= fslot;
901       else
902       {
903         if (total_ha >= MAX_HA)
904         {
905           sql_print_error("Too many plugins loaded. Limit is %lu. "
906                           "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
907           goto err_deinit;
908         }
909         hton->slot= total_ha++;
910       }
911       installed_htons[hton->db_type]= hton;
912       tmp= hton->savepoint_offset;
913       hton->savepoint_offset= savepoint_alloc_size;
914       savepoint_alloc_size+= tmp;
915       hton2plugin[hton->slot]=plugin;
916       builtin_htons[hton->slot]= (plugin->plugin_dl == NULL);
917       if (hton->prepare)
918         total_ha_2pc++;
919       break;
920     }
921     /* fall through */
922   default:
923     hton->state= SHOW_OPTION_DISABLED;
924     break;
925   }
926 
927   /*
928     This is entirely for legacy. We will create a new "disk based" hton and a
929     "memory" hton which will be configurable longterm. We should be able to
930     remove partition and myisammrg.
931   */
932   switch (hton->db_type) {
933   case DB_TYPE_HEAP:
934     heap_hton= hton;
935     break;
936   case DB_TYPE_MYISAM:
937     myisam_hton= hton;
938     break;
939   case DB_TYPE_INNODB:
940     innodb_hton= hton;
941     break;
942   default:
943     break;
944   };
945 
946   /*
947     Re-load the optimizer cost constants since this storage engine can
948     have non-default cost constants.
949   */
950   reload_optimizer_cost_constants();
951 
952   DBUG_RETURN(0);
953 
954 err_deinit:
955   /*
956     Let plugin do its inner deinitialization as plugin->init()
957     was successfully called before.
958   */
959   if (plugin->plugin->deinit)
960     (void) plugin->plugin->deinit(NULL);
961 
962 err:
963   my_free(hton);
964 err_no_hton_memory:
965   plugin->data= NULL;
966   DBUG_RETURN(1);
967 }
968 
ha_init()969 int ha_init()
970 {
971   int error= 0;
972   DBUG_ENTER("ha_init");
973 
974   assert(total_ha < MAX_HA);
975   /*
976     Check if there is a transaction-capable storage engine besides the
977     binary log (which is considered a transaction-capable storage engine in
978     counting total_ha)
979   */
980   opt_using_transactions= total_ha>(ulong)opt_bin_log;
981   savepoint_alloc_size+= sizeof(SAVEPOINT);
982 
983   /*
984     Initialize system database name cache.
985     This cache is used to do a quick check if a given
986     db.tablename is a system table.
987   */
988   known_system_databases= ha_known_system_databases();
989 
990   DBUG_RETURN(error);
991 }
992 
ha_end()993 void ha_end()
994 {
995   // Unregister handler error messages.
996   my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
997   my_free(handler_errmsgs);
998 }
999 
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)1000 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
1001                                  void *path)
1002 {
1003   handlerton *hton= plugin_data<handlerton*>(plugin);
1004   if (hton->state == SHOW_OPTION_YES && hton->drop_database)
1005     hton->drop_database(hton, (char *)path);
1006   return FALSE;
1007 }
1008 
1009 
ha_drop_database(char * path)1010 void ha_drop_database(char* path)
1011 {
1012   plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
1013 }
1014 
1015 
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)1016 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
1017                                    void *unused)
1018 {
1019   handlerton *hton= plugin_data<handlerton*>(plugin);
1020   /*
1021     there's no need to rollback here as all transactions must
1022     be rolled back already
1023   */
1024   if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
1025   {
1026     if (hton->close_connection)
1027       hton->close_connection(hton, thd);
1028     /* make sure ha_data is reset and ha_data_lock is released */
1029     thd_set_ha_data(thd, hton, NULL);
1030   }
1031   return FALSE;
1032 }
1033 
1034 
1035 /**
1036   @note
1037     don't bother to rollback here, it's done already
1038 */
ha_close_connection(THD * thd)1039 void ha_close_connection(THD* thd)
1040 {
1041   plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1042 }
1043 
1044 
kill_handlerton(THD * thd,plugin_ref plugin,void *)1045 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
1046 {
1047   handlerton *hton= plugin_data<handlerton*>(plugin);
1048 
1049   if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
1050   {
1051     if (thd_get_ha_data(thd, hton))
1052       hton->kill_connection(hton, thd);
1053   }
1054 
1055   return FALSE;
1056 }
1057 
ha_kill_connection(THD * thd)1058 void ha_kill_connection(THD *thd)
1059 {
1060   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1061 }
1062 
1063 
1064 /* ========================================================================
1065  ======================= TRANSACTIONS ===================================*/
1066 
1067 /**
1068   Transaction handling in the server
1069   ==================================
1070 
1071   In each client connection, MySQL maintains two transactional
1072   states:
1073   - a statement transaction,
1074   - a standard, also called normal transaction.
1075 
1076   Historical note
1077   ---------------
1078   "Statement transaction" is a non-standard term that comes
1079   from the times when MySQL supported BerkeleyDB storage engine.
1080 
1081   First of all, it should be said that in BerkeleyDB auto-commit
1082   mode auto-commits operations that are atomic to the storage
1083   engine itself, such as a write of a record, and are too
1084   high-granular to be atomic from the application perspective
1085   (MySQL). One SQL statement could involve many BerkeleyDB
1086   auto-committed operations and thus BerkeleyDB auto-commit was of
1087   little use to MySQL.
1088 
1089   Secondly, instead of SQL standard savepoints, BerkeleyDB
1090   provided the concept of "nested transactions". In a nutshell,
1091   transactions could be arbitrarily nested, but when the parent
1092   transaction was committed or aborted, all its child (nested)
1093   transactions were handled committed or aborted as well.
1094   Commit of a nested transaction, in turn, made its changes
1095   visible, but not durable: it destroyed the nested transaction,
1096   all its changes would become available to the parent and
1097   currently active nested transactions of this parent.
1098 
1099   So the mechanism of nested transactions was employed to
1100   provide "all or nothing" guarantee of SQL statements
1101   required by the standard.
1102   A nested transaction would be created at start of each SQL
1103   statement, and destroyed (committed or aborted) at statement
1104   end. Such nested transaction was internally referred to as
1105   a "statement transaction" and gave birth to the term.
1106 
1107   (Historical note ends)
1108 
1109   Since then a statement transaction is started for each statement
1110   that accesses transactional tables or uses the binary log.  If
1111   the statement succeeds, the statement transaction is committed.
1112   If the statement fails, the transaction is rolled back. Commits
1113   of statement transactions are not durable -- each such
1114   transaction is nested in the normal transaction, and if the
1115   normal transaction is rolled back, the effects of all enclosed
1116   statement transactions are undone as well.  Technically,
1117   a statement transaction can be viewed as a savepoint which is
1118   maintained automatically in order to make effects of one
1119   statement atomic.
1120 
1121   The normal transaction is started by the user and is ended
1122   usually upon a user request as well. The normal transaction
1123   encloses transactions of all statements issued between
1124   its beginning and its end.
1125   In autocommit mode, the normal transaction is equivalent
1126   to the statement transaction.
1127 
1128   Since MySQL supports PSEA (pluggable storage engine
1129   architecture), more than one transactional engine can be
1130   active at a time. Hence transactions, from the server
1131   point of view, are always distributed. In particular,
1132   transactional state is maintained independently for each
1133   engine. In order to commit a transaction the two phase
1134   commit protocol is employed.
1135 
1136   Not all statements are executed in context of a transaction.
1137   Administrative and status information statements do not modify
1138   engine data, and thus do not start a statement transaction and
1139   also have no effect on the normal transaction. Examples of such
1140   statements are SHOW STATUS and RESET SLAVE.
1141 
1142   Similarly DDL statements are not transactional,
1143   and therefore a transaction is [almost] never started for a DDL
1144   statement. The difference between a DDL statement and a purely
1145   administrative statement though is that a DDL statement always
1146   commits the current transaction before proceeding, if there is
1147   any.
1148 
1149   At last, SQL statements that work with non-transactional
1150   engines also have no effect on the transaction state of the
1151   connection. Even though they are written to the binary log,
1152   and the binary log is, overall, transactional, the writes
1153   are done in "write-through" mode, directly to the binlog
1154   file, followed with a OS cache sync, in other words,
1155   bypassing the binlog undo log (translog).
1156   They do not commit the current normal transaction.
1157   A failure of a statement that uses non-transactional tables
1158   would cause a rollback of the statement transaction, but
1159   in case there no non-transactional tables are used,
1160   no statement transaction is started.
1161 
1162   Data layout
1163   -----------
1164 
1165   The server stores its transaction-related data in
1166   thd->transaction. This structure has two members of type
1167   THD_TRANS. These members correspond to the statement and
1168   normal transactions respectively:
1169 
1170   - thd->transaction.stmt contains a list of engines
1171   that are participating in the given statement
1172   - thd->transaction.all contains a list of engines that
1173   have participated in any of the statement transactions started
1174   within the context of the normal transaction.
1175   Each element of the list contains a pointer to the storage
1176   engine, engine-specific transactional data, and engine-specific
1177   transaction flags.
1178 
1179   In autocommit mode thd->transaction.all is empty.
1180   Instead, data of thd->transaction.stmt is
1181   used to commit/rollback the normal transaction.
1182 
1183   The list of registered engines has a few important properties:
1184   - no engine is registered in the list twice
1185   - engines are present in the list a reverse temporal order --
1186   new participants are always added to the beginning of the list.
1187 
1188   Transaction life cycle
1189   ----------------------
1190 
1191   When a new connection is established, thd->transaction
1192   members are initialized to an empty state.
1193   If a statement uses any tables, all affected engines
1194   are registered in the statement engine list. In
1195   non-autocommit mode, the same engines are registered in
1196   the normal transaction list.
1197   At the end of the statement, the server issues a commit
1198   or a roll back for all engines in the statement list.
1199   At this point transaction flags of an engine, if any, are
1200   propagated from the statement list to the list of the normal
1201   transaction.
1202   When commit/rollback is finished, the statement list is
1203   cleared. It will be filled in again by the next statement,
1204   and emptied again at the next statement's end.
1205 
1206   The normal transaction is committed in a similar way
1207   (by going over all engines in thd->transaction.all list)
1208   but at different times:
1209   - upon COMMIT SQL statement is issued by the user
1210   - implicitly, by the server, at the beginning of a DDL statement
1211   or SET AUTOCOMMIT={0|1} statement.
1212 
1213   The normal transaction can be rolled back as well:
1214   - if the user has requested so, by issuing ROLLBACK SQL
1215   statement
1216   - if one of the storage engines requested a rollback
1217   by setting thd->transaction_rollback_request. This may
1218   happen in case, e.g., when the transaction in the engine was
1219   chosen a victim of the internal deadlock resolution algorithm
1220   and rolled back internally. When such a situation happens, there
1221   is little the server can do and the only option is to rollback
1222   transactions in all other participating engines.  In this case
1223   the rollback is accompanied by an error sent to the user.
1224 
1225   As follows from the use cases above, the normal transaction
1226   is never committed when there is an outstanding statement
1227   transaction. In most cases there is no conflict, since
1228   commits of the normal transaction are issued by a stand-alone
1229   administrative or DDL statement, thus no outstanding statement
1230   transaction of the previous statement exists. Besides,
1231   all statements that manipulate with the normal transaction
1232   are prohibited in stored functions and triggers, therefore
1233   no conflicting situation can occur in a sub-statement either.
1234   The remaining rare cases when the server explicitly has
1235   to commit the statement transaction prior to committing the normal
1236   one cover error-handling scenarios (see for example
1237   SQLCOM_LOCK_TABLES).
1238 
1239   When committing a statement or a normal transaction, the server
1240   either uses the two-phase commit protocol, or issues a commit
1241   in each engine independently. The two-phase commit protocol
1242   is used only if:
1243   - all participating engines support two-phase commit (provide
1244     handlerton::prepare PSEA API call) and
1245   - transactions in at least two engines modify data (i.e. are
1246   not read-only).
1247 
1248   Note that the two phase commit is used for
1249   statement transactions, even though they are not durable anyway.
1250   This is done to ensure logical consistency of data in a multiple-
1251   engine transaction.
1252   For example, imagine that some day MySQL supports unique
1253   constraint checks deferred till the end of statement. In such
1254   case a commit in one of the engines may yield ER_DUP_KEY,
1255   and MySQL should be able to gracefully abort statement
1256   transactions of other participants.
1257 
1258   After the normal transaction has been committed,
1259   thd->transaction.all list is cleared.
1260 
1261   When a connection is closed, the current normal transaction, if
1262   any, is rolled back.
1263 
1264   Roles and responsibilities
1265   --------------------------
1266 
1267   The server has no way to know that an engine participates in
1268   the statement and a transaction has been started
1269   in it unless the engine says so. Thus, in order to be
1270   a part of a transaction, the engine must "register" itself.
1271   This is done by invoking trans_register_ha() server call.
1272   Normally the engine registers itself whenever handler::external_lock()
1273   is called. trans_register_ha() can be invoked many times: if
1274   an engine is already registered, the call does nothing.
1275   In case autocommit is not set, the engine must register itself
1276   twice -- both in the statement list and in the normal transaction
1277   list.
1278   In which list to register is a parameter of trans_register_ha().
1279 
1280   Note, that although the registration interface in itself is
1281   fairly clear, the current usage practice often leads to undesired
1282   effects. E.g. since a call to trans_register_ha() in most engines
1283   is embedded into implementation of handler::external_lock(), some
1284   DDL statements start a transaction (at least from the server
1285   point of view) even though they are not expected to. E.g.
1286   CREATE TABLE does not start a transaction, since
1287   handler::external_lock() is never called during CREATE TABLE. But
1288   CREATE TABLE ... SELECT does, since handler::external_lock() is
1289   called for the table that is being selected from. This has no
1290   practical effects currently, but must be kept in mind
1291   nevertheless.
1292 
1293   Once an engine is registered, the server will do the rest
1294   of the work.
1295 
1296   During statement execution, whenever any of data-modifying
1297   PSEA API methods is used, e.g. handler::write_row() or
1298   handler::update_row(), the read-write flag is raised in the
1299   statement transaction for the involved engine.
1300   Currently All PSEA calls are "traced", and the data can not be
1301   changed in a way other than issuing a PSEA call. Important:
1302   unless this invariant is preserved the server will not know that
1303   a transaction in a given engine is read-write and will not
1304   involve the two-phase commit protocol!
1305 
1306   At the end of a statement, server call trans_commit_stmt is
1307   invoked. This call in turn invokes handlerton::prepare()
1308   for every involved engine. Prepare is followed by a call
1309   to handlerton::commit_one_phase() If a one-phase commit
1310   will suffice, handlerton::prepare() is not invoked and
1311   the server only calls handlerton::commit_one_phase().
1312   At statement commit, the statement-related read-write
1313   engine flag is propagated to the corresponding flag in the
1314   normal transaction.  When the commit is complete, the list
1315   of registered engines is cleared.
1316 
1317   Rollback is handled in a similar fashion.
1318 
1319   Additional notes on DDL and the normal transaction.
1320   ---------------------------------------------------
1321 
1322   DDLs and operations with non-transactional engines
1323   do not "register" in thd->transaction lists, and thus do not
1324   modify the transaction state. Besides, each DDL in
1325   MySQL is prefixed with an implicit normal transaction commit
1326   (a call to trans_commit_implicit()), and thus leaves nothing
1327   to modify.
1328   However, as it has been pointed out with CREATE TABLE .. SELECT,
1329   some DDL statements can start a *new* transaction.
1330 
1331   Behaviour of the server in this case is currently badly
1332   defined.
1333   DDL statements use a form of "semantic" logging
1334   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1335   the newly created table is deleted.
1336   In addition, some DDL statements issue interim transaction
1337   commits: e.g. ALTER TABLE issues a commit after data is copied
1338   from the original table to the internal temporary table. Other
1339   statements, e.g. CREATE TABLE ... SELECT do not always commit
1340   after itself.
1341   And finally there is a group of DDL statements such as
1342   RENAME/DROP TABLE that doesn't start a new transaction
1343   and doesn't commit.
1344 
1345   This diversity makes it hard to say what will happen if
1346   by chance a stored function is invoked during a DDL --
1347   whether any modifications it makes will be committed or not
1348   is not clear. Fortunately, SQL grammar of few DDLs allows
1349   invocation of a stored function.
1350 
1351   A consistent behaviour is perhaps to always commit the normal
1352   transaction after all DDLs, just like the statement transaction
1353   is always committed at the end of all statements.
1354 */
1355 
1356 /**
1357   Register a storage engine for a transaction.
1358 
1359   Every storage engine MUST call this function when it starts
1360   a transaction or a statement (that is it must be called both for the
1361   "beginning of transaction" and "beginning of statement").
1362   Only storage engines registered for the transaction/statement
1363   will know when to commit/rollback it.
1364 
1365   @note
1366     trans_register_ha is idempotent - storage engine may register many
1367     times per transaction.
1368 
1369 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid)1370 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1371                        const ulonglong *trxid)
1372 {
1373   Ha_trx_info *ha_info;
1374   Transaction_ctx *trn_ctx= thd->get_transaction();
1375   Transaction_ctx::enum_trx_scope trx_scope=
1376     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1377 
1378   DBUG_ENTER("trans_register_ha");
1379   DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1380 
1381   Ha_trx_info *knownn_trans= trn_ctx->ha_trx_info(trx_scope);
1382   if (all)
1383   {
1384     /*
1385       Ensure no active backup engine data exists, unless the current transaction
1386       is from replication and in active xa state.
1387     */
1388     assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1389            (thd->get_transaction()->xid_state()->
1390             has_state(XID_STATE::XA_ACTIVE)));
1391     assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1392            (thd->is_binlog_applier() || thd->slave_thread));
1393 
1394     thd->server_status|= SERVER_STATUS_IN_TRANS;
1395     if (thd->tx_read_only)
1396       thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1397     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1398   }
1399 
1400   ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1401 
1402   if (ha_info->is_started())
1403     DBUG_VOID_RETURN; /* already registered, return */
1404 
1405   ha_info->register_ha(knownn_trans, ht_arg);
1406   trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1407 
1408   if (ht_arg->prepare == 0)
1409     trn_ctx->set_no_2pc(trx_scope, true);
1410 
1411   trn_ctx->xid_state()->set_query_id(thd->query_id);
1412 /*
1413   Register transaction start in performance schema if not done already.
1414   By doing this, we handle cases when the transaction is started implicitly in
1415   autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1416   executed statement is a single-statement transaction.
1417 
1418   Explicitly started transactions are handled in trans_begin().
1419 
1420   Do not register transactions in which binary log is the only participating
1421   transactional storage engine.
1422 */
1423 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1424   if (thd->m_transaction_psi == NULL &&
1425       ht_arg->db_type != DB_TYPE_BINLOG)
1426   {
1427     const XID *xid= trn_ctx->xid_state()->get_xid();
1428     my_bool autocommit= !thd->in_multi_stmt_transaction_mode();
1429     thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1430                                          xid, trxid, thd->tx_isolation,
1431                                          thd->tx_read_only, autocommit);
1432     DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1433     gtid_set_performance_schema_values(thd);
1434   }
1435 #endif
1436   DBUG_VOID_RETURN;
1437 }
1438 
1439 /**
1440   @retval
1441     0   ok
1442   @retval
1443     1   error, transaction was rolled back
1444 */
ha_prepare(THD * thd)1445 int ha_prepare(THD *thd)
1446 {
1447   int error=0;
1448   Transaction_ctx *trn_ctx= thd->get_transaction();
1449   DBUG_ENTER("ha_prepare");
1450 
1451   if (trn_ctx->is_active(Transaction_ctx::SESSION))
1452   {
1453     const Ha_trx_info *ha_info= trn_ctx->ha_trx_info(
1454       Transaction_ctx::SESSION);
1455     bool gtid_error= false, need_clear_owned_gtid= false;
1456 
1457     if ((gtid_error=
1458          MY_TEST(commit_owned_gtids(thd, true, &need_clear_owned_gtid))))
1459     {
1460       assert(need_clear_owned_gtid);
1461 
1462       ha_rollback_trans(thd, true);
1463       error= 1;
1464       goto err;
1465     }
1466 
1467     while (ha_info)
1468     {
1469       handlerton *ht= ha_info->ht();
1470       assert(!thd->status_var_aggregated);
1471       thd->status_var.ha_prepare_count++;
1472       if (ht->prepare)
1473       {
1474         DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1475           ha_rollback_trans(thd, true);
1476           DBUG_RETURN(1);
1477         });
1478         if (ht->prepare(ht, thd, true))
1479         {
1480           ha_rollback_trans(thd, true);
1481           error=1;
1482           break;
1483         }
1484       }
1485       else
1486       {
1487         push_warning_printf(thd, Sql_condition::SL_WARNING,
1488                             ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1489                             ha_resolve_storage_engine_name(ht));
1490       }
1491       ha_info= ha_info->next();
1492     }
1493 
1494     assert(thd->get_transaction()->xid_state()->
1495            has_state(XID_STATE::XA_IDLE));
1496 
1497 err:
1498     gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1499   }
1500 
1501   DBUG_RETURN(error);
1502 }
1503 
1504 /**
1505   Check if we can skip the two-phase commit.
1506 
1507   A helper function to evaluate if two-phase commit is mandatory.
1508   As a side effect, propagates the read-only/read-write flags
1509   of the statement transaction to its enclosing normal transaction.
1510 
1511   If we have at least two engines with read-write changes we must
1512   run a two-phase commit. Otherwise we can run several independent
1513   commits as the only transactional engine has read-write changes
1514   and others are read-only.
1515 
1516   @retval   0   All engines are read-only.
1517   @retval   1   We have the only engine with read-write changes.
1518   @retval   >1  More than one engine have read-write changes.
1519                 Note: return value might NOT be the exact number of
1520                 engines with read-write changes.
1521 */
1522 
1523 static
1524 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1525 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1526                                     bool all)
1527 {
1528   /* The number of storage engines that have actual changes. */
1529   unsigned rw_ha_count= 0;
1530   Ha_trx_info *ha_info;
1531 
1532   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1533   {
1534     if (ha_info->is_trx_read_write())
1535       ++rw_ha_count;
1536 
1537     if (! all)
1538     {
1539       Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1540       assert(ha_info != ha_info_all);
1541       /*
1542         Merge read-only/read-write information about statement
1543         transaction to its enclosing normal transaction. Do this
1544         only if in a real transaction -- that is, if we know
1545         that ha_info_all is registered in thd->transaction.all.
1546         Since otherwise we only clutter the normal transaction flags.
1547       */
1548       if (ha_info_all->is_started()) /* FALSE if autocommit. */
1549         ha_info_all->coalesce_trx_with(ha_info);
1550     }
1551     else if (rw_ha_count > 1)
1552     {
1553       /*
1554         It is a normal transaction, so we don't need to merge read/write
1555         information up, and the need for two-phase commit has been
1556         already established. Break the loop prematurely.
1557       */
1558       break;
1559     }
1560   }
1561   return rw_ha_count;
1562 }
1563 
1564 
1565 /**
1566   The function computes condition to call gtid persistor wrapper,
1567   and executes it.
1568   It is invoked at committing a statement or transaction, including XA,
1569   and also at XA prepare handling.
1570 
1571   @param thd  Thread context.
1572   @param all  The execution scope, true for the transaction one, false
1573               for the statement one.
1574   @param[out] need_clear_owned_gtid_ptr
1575               A pointer to bool variable to return the computed decision
1576               value.
1577   @return zero as no error indication, non-zero otherwise
1578 */
1579 
commit_owned_gtids(THD * thd,bool all,bool * need_clear_owned_gtid_ptr)1580 int commit_owned_gtids(THD *thd, bool all, bool *need_clear_owned_gtid_ptr)
1581 {
1582   DBUG_ENTER("commit_owned_gtids(...)");
1583   int error= 0;
1584 
1585   if ((!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates)) &&
1586       (all || !thd->in_multi_stmt_transaction_mode()) &&
1587       !thd->is_operating_gtid_table_implicitly &&
1588       !thd->is_operating_substatement_implicitly)
1589   {
1590     /*
1591       If the binary log is disabled for this thread (either by
1592       log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1593       slave thread), then the statement will not be written to
1594       the binary log. In this case, we should save its GTID into
1595       mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1596       did when binlog is enabled.
1597     */
1598     if (thd->owned_gtid.sidno > 0)
1599     {
1600       error= gtid_state->save(thd);
1601       *need_clear_owned_gtid_ptr= true;
1602     }
1603     else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)
1604       *need_clear_owned_gtid_ptr= true;
1605   }
1606   else
1607   {
1608     *need_clear_owned_gtid_ptr= false;
1609   }
1610 
1611   DBUG_RETURN(error);
1612 }
1613 
1614 
1615 /**
1616   The function is a wrapper of commit_owned_gtids(...). It is invoked
1617   at committing a partially failed statement or transaction.
1618 
1619   @param thd  Thread context.
1620 
1621   @retval -1 if error when persisting owned gtid.
1622   @retval 0 if succeed to commit owned gtid.
1623   @retval 1 if do not meet conditions to commit owned gtid.
1624 */
commit_owned_gtid_by_partial_command(THD * thd)1625 int commit_owned_gtid_by_partial_command(THD *thd)
1626 {
1627   DBUG_ENTER("commit_owned_gtid_by_partial_command(THD *thd)");
1628   bool need_clear_owned_gtid_ptr= false;
1629   int ret= 0;
1630 
1631   if (commit_owned_gtids(thd, true, &need_clear_owned_gtid_ptr))
1632   {
1633     /* Error when saving gtid into mysql.gtid_executed table. */
1634     gtid_state->update_on_rollback(thd);
1635     ret= -1;
1636   }
1637   else if (need_clear_owned_gtid_ptr)
1638   {
1639     gtid_state->update_on_commit(thd);
1640     ret= 0;
1641   }
1642   else
1643   {
1644     ret= 1;
1645   }
1646 
1647   DBUG_RETURN(ret);
1648 }
1649 
1650 
1651 /**
1652   @param[in] ignore_global_read_lock   Allow commit to complete even if a
1653                                        global read lock is active. This can be
1654                                        used to allow changes to internal tables
1655                                        (e.g. slave status tables).
1656 
1657   @retval
1658     0   ok
1659   @retval
1660     1   transaction was rolled back
1661   @retval
1662     2   error during commit, data may be inconsistent
1663 
1664   @todo
1665     Since we don't support nested statement transactions in 5.0,
1666     we can't commit or rollback stmt transactions while we are inside
1667     stored functions or triggers. So we simply do nothing now.
1668     TODO: This should be fixed in later ( >= 5.1) releases.
1669 */
1670 
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1671 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1672 {
1673   int error= 0;
1674   bool need_clear_owned_gtid= false;
1675   /*
1676     Save transaction owned gtid into table before transaction prepare
1677     if binlog is disabled, or binlog is enabled and log_slave_updates
1678     is disabled with slave SQL thread or slave worker thread.
1679   */
1680   error= commit_owned_gtids(thd, all, &need_clear_owned_gtid);
1681 
1682   /*
1683     'all' means that this is either an explicit commit issued by
1684     user, or an implicit commit issued by a DDL.
1685   */
1686   Transaction_ctx *trn_ctx= thd->get_transaction();
1687   Transaction_ctx::enum_trx_scope trx_scope=
1688     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1689 
1690   /*
1691     "real" is a nick name for a transaction for which a commit will
1692     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1693     transation is not 'real': even though it's possible to commit it,
1694     the changes are not durable as they might be rolled back if the
1695     enclosing 'all' transaction is rolled back.
1696   */
1697   bool is_real_trans=
1698     all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1699 
1700   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope);
1701   XID_STATE *xid_state= trn_ctx->xid_state();
1702 
1703   DBUG_ENTER("ha_commit_trans");
1704 
1705   DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1706                       all, thd->in_sub_stmt, ha_info, is_real_trans));
1707   /*
1708     We must not commit the normal transaction if a statement
1709     transaction is pending. Otherwise statement transaction
1710     flags will not get propagated to its normal transaction's
1711     counterpart.
1712   */
1713   assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
1714          !all);
1715 
1716   if (thd->in_sub_stmt)
1717   {
1718     assert(0);
1719     /*
1720       Since we don't support nested statement transactions in 5.0,
1721       we can't commit or rollback stmt transactions while we are inside
1722       stored functions or triggers. So we simply do nothing now.
1723       TODO: This should be fixed in later ( >= 5.1) releases.
1724     */
1725     if (!all)
1726       DBUG_RETURN(0);
1727     /*
1728       We assume that all statements which commit or rollback main transaction
1729       are prohibited inside of stored functions or triggers. So they should
1730       bail out with error even before ha_commit_trans() call. To be 100% safe
1731       let us throw error in non-debug builds.
1732     */
1733     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1734     DBUG_RETURN(2);
1735   }
1736 
1737   MDL_request mdl_request;
1738   bool release_mdl= false;
1739   if (ha_info)
1740   {
1741     uint rw_ha_count;
1742     bool rw_trans;
1743 
1744     DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1745 
1746     rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1747     trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1748     /* rw_trans is TRUE when we in a transaction changing data */
1749     rw_trans= is_real_trans && (rw_ha_count > 0);
1750 
1751     DBUG_EXECUTE_IF("dbug.enabled_commit",
1752                     {
1753                       const char act[]= "now signal Reached wait_for signal.commit_continue";
1754                       assert(!debug_sync_set_action(current_thd,
1755                                                     STRING_WITH_LEN(act)));
1756                     };);
1757     if (rw_trans && !ignore_global_read_lock)
1758     {
1759       /*
1760         Acquire a metadata lock which will ensure that COMMIT is blocked
1761         by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1762         COMMIT in progress blocks FTWRL).
1763 
1764         We allow the owner of FTWRL to COMMIT; we assume that it knows
1765         what it does.
1766       */
1767       MDL_REQUEST_INIT(&mdl_request,
1768                        MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1769                        MDL_EXPLICIT);
1770 
1771       DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1772       if (thd->mdl_context.acquire_lock(&mdl_request,
1773                                         thd->variables.lock_wait_timeout))
1774       {
1775         ha_rollback_trans(thd, all);
1776         DBUG_RETURN(1);
1777       }
1778       release_mdl= true;
1779 
1780       DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1781     }
1782 
1783     if (rw_trans && (stmt_has_updated_trans_table(ha_info)
1784         || trans_has_noop_dml(ha_info)) && check_readonly(thd, true))
1785     {
1786       ha_rollback_trans(thd, all);
1787       error= 1;
1788       goto end;
1789     }
1790 
1791     if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1792       error= tc_log->prepare(thd, all);
1793   }
1794   /*
1795     The state of XA transaction is changed to Prepared, intermediately.
1796     It's going to change to the regular NOTR at the end.
1797     The fact of the Prepared state is of interest to binary logger.
1798   */
1799   if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE))
1800   {
1801     assert(thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1802            static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1803            get_xa_opt() == XA_ONE_PHASE);
1804 
1805     xid_state->set_state(XID_STATE::XA_PREPARED);
1806   }
1807   if (error || (error= tc_log->commit(thd, all)))
1808   {
1809     ha_rollback_trans(thd, all);
1810     error= 1;
1811     goto end;
1812   }
1813 /*
1814   Mark multi-statement (any autocommit mode) or single-statement
1815   (autocommit=1) transaction as rolled back
1816 */
1817 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1818   if (is_real_trans && thd->m_transaction_psi != NULL)
1819   {
1820     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1821     thd->m_transaction_psi= NULL;
1822   }
1823 #endif
1824   DBUG_EXECUTE_IF("crash_commit_after",
1825                   if (!thd->is_operating_gtid_table_implicitly)
1826                     DBUG_SUICIDE(););
1827 end:
1828   if (release_mdl && mdl_request.ticket)
1829   {
1830     /*
1831       We do not always immediately release transactional locks
1832       after ha_commit_trans() (see uses of ha_enable_transaction()),
1833       thus we release the commit blocker lock as soon as it's
1834       not needed.
1835     */
1836     DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1837     thd->mdl_context.release_lock(mdl_request.ticket);
1838   }
1839   /* Free resources and perform other cleanup even for 'empty' transactions. */
1840   if (is_real_trans)
1841   {
1842     trn_ctx->cleanup();
1843     thd->tx_priority= 0;
1844   }
1845 
1846   if (need_clear_owned_gtid)
1847   {
1848     thd->server_status&= ~SERVER_STATUS_IN_TRANS;
1849     /*
1850       Release the owned GTID when binlog is disabled, or binlog is
1851       enabled and log_slave_updates is disabled with slave SQL thread
1852       or slave worker thread.
1853     */
1854     if (error)
1855       gtid_state->update_on_rollback(thd);
1856     else
1857       gtid_state->update_on_commit(thd);
1858   }
1859 
1860   DBUG_RETURN(error);
1861 }
1862 
1863 /**
1864   Commit the sessions outstanding transaction.
1865 
1866   @pre thd->transaction.flags.commit_low == true
1867   @post thd->transaction.flags.commit_low == false
1868 
1869   @note This function does not care about global read lock; the caller
1870   should.
1871 
1872   @param[in]  all  Is set in case of explicit commit
1873                    (COMMIT statement), or implicit commit
1874                    issued by DDL. Is not set when called
1875                    at the end of statement, even if
1876                    autocommit=1.
1877   @param[in]  run_after_commit
1878                    True by default, otherwise, does not execute
1879                    the after_commit hook in the function.
1880 */
1881 
ha_commit_low(THD * thd,bool all,bool run_after_commit)1882 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1883 {
1884   int error=0;
1885   Transaction_ctx *trn_ctx= thd->get_transaction();
1886   Transaction_ctx::enum_trx_scope trx_scope=
1887     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1888   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1889 
1890   DBUG_ENTER("ha_commit_low");
1891 
1892   if (ha_info)
1893   {
1894     bool restore_backup_ha_data= false;
1895     /*
1896       At execution of XA COMMIT ONE PHASE binlog or slave applier
1897       reattaches the engine ha_data to THD, previously saved at XA START.
1898     */
1899     if (all && thd->rpl_unflag_detached_engine_ha_data())
1900     {
1901       assert(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1902       assert(static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1903              get_xa_opt() == XA_ONE_PHASE);
1904       restore_backup_ha_data= true;
1905     }
1906 
1907     for (; ha_info; ha_info= ha_info_next)
1908     {
1909       int err;
1910       handlerton *ht= ha_info->ht();
1911       if ((err= ht->commit(ht, thd, all)))
1912       {
1913         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1914         error=1;
1915       }
1916       assert(!thd->status_var_aggregated);
1917       thd->status_var.ha_commit_count++;
1918       ha_info_next= ha_info->next();
1919       if (restore_backup_ha_data)
1920         reattach_engine_ha_data_to_thd(thd, ht);
1921       ha_info->reset(); /* keep it conveniently zero-filled */
1922     }
1923     trn_ctx->reset_scope(trx_scope);
1924     if (all)
1925     {
1926       trn_ctx->invalidate_changed_tables_in_cache();
1927     }
1928   }
1929   /* Free resources and perform other cleanup even for 'empty' transactions. */
1930   if (all)
1931     trn_ctx->cleanup();
1932   /*
1933     When the transaction has been committed, we clear the commit_low
1934     flag. This allow other parts of the system to check if commit_low
1935     was called.
1936   */
1937   trn_ctx->m_flags.commit_low= false;
1938   if (run_after_commit && thd->get_transaction()->m_flags.run_hooks)
1939   {
1940     /*
1941        If commit succeeded, we call the after_commit hook.
1942 
1943        TODO: Investigate if this can be refactored so that there is
1944              only one invocation of this hook in the code (in
1945              MYSQL_LOG_BIN::finish_commit).
1946     */
1947     if (!error)
1948       (void) RUN_HOOK(transaction, after_commit, (thd, all));
1949     trn_ctx->m_flags.run_hooks= false;
1950   }
1951   DBUG_RETURN(error);
1952 }
1953 
1954 
ha_rollback_low(THD * thd,bool all)1955 int ha_rollback_low(THD *thd, bool all)
1956 {
1957   Transaction_ctx *trn_ctx= thd->get_transaction();
1958   int error= 0;
1959   Transaction_ctx::enum_trx_scope trx_scope=
1960     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1961   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1962 
1963   (void) RUN_HOOK(transaction, before_rollback, (thd, all));
1964 
1965   if (ha_info)
1966   {
1967     bool restore_backup_ha_data= false;
1968     /*
1969       Similarly to the commit case, the binlog or slave applier
1970       reattaches the engine ha_data to THD.
1971     */
1972     if (all && thd->rpl_unflag_detached_engine_ha_data())
1973     {
1974       assert(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
1975              thd->killed == THD::KILL_CONNECTION);
1976 
1977       restore_backup_ha_data= true;
1978     }
1979 
1980     for (; ha_info; ha_info= ha_info_next)
1981     {
1982       int err;
1983       handlerton *ht= ha_info->ht();
1984       if ((err= ht->rollback(ht, thd, all)))
1985       { // cannot happen
1986         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1987         error= 1;
1988       }
1989       assert(!thd->status_var_aggregated);
1990       thd->status_var.ha_rollback_count++;
1991       ha_info_next= ha_info->next();
1992       if (restore_backup_ha_data)
1993         reattach_engine_ha_data_to_thd(thd, ht);
1994       ha_info->reset(); /* keep it conveniently zero-filled */
1995     }
1996     trn_ctx->reset_scope(trx_scope);
1997   }
1998 
1999   /*
2000     Thanks to possibility of MDL deadlock rollback request can come even if
2001     transaction hasn't been started in any transactional storage engine.
2002 
2003     It is possible to have a call of ha_rollback_low() while handling
2004     failure from ha_prepare() and an error in Daignostics_area still
2005     wasn't set. Therefore it is required to check that an error in
2006     Diagnostics_area is set before calling the method XID_STATE::set_error().
2007 
2008     If it wasn't done it would lead to failure of the assertion
2009     assert(m_status == DA_ERROR)
2010     in the method Diagnostics_area::mysql_errno().
2011 
2012     In case ha_prepare is failed and an error wasn't set in Diagnostics_area
2013     the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2014     the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2015     returned by ha_prepare() is handled.
2016   */
2017   if (all && thd->transaction_rollback_request && thd->is_error())
2018     trn_ctx->xid_state()->set_error(thd);
2019 
2020   (void) RUN_HOOK(transaction, after_rollback, (thd, all));
2021   return error;
2022 }
2023 
2024 
ha_rollback_trans(THD * thd,bool all)2025 int ha_rollback_trans(THD *thd, bool all)
2026 {
2027   int error=0;
2028   Transaction_ctx *trn_ctx= thd->get_transaction();
2029   bool is_xa_rollback= trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2030 
2031   /*
2032     "real" is a nick name for a transaction for which a commit will
2033     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2034     transaction is not 'real': even though it's possible to commit it,
2035     the changes are not durable as they might be rolled back if the
2036     enclosing 'all' transaction is rolled back.
2037     We establish the value of 'is_real_trans' by checking
2038     if it's an explicit COMMIT or BEGIN statement, or implicit
2039     commit issued by DDL (in these cases all == TRUE),
2040     or if we're running in autocommit mode (it's only in the autocommit mode
2041     ha_commit_one_phase() is called with an empty
2042     transaction.all.ha_list, see why in trans_register_ha()).
2043   */
2044   bool is_real_trans=
2045     all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2046 
2047   DBUG_ENTER("ha_rollback_trans");
2048 
2049   /*
2050     We must not rollback the normal transaction if a statement
2051     transaction is pending.
2052   */
2053   assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
2054          !all);
2055 
2056   if (thd->in_sub_stmt)
2057   {
2058     assert(0);
2059     /*
2060       If we are inside stored function or trigger we should not commit or
2061       rollback current statement transaction. See comment in ha_commit_trans()
2062       call for more information.
2063     */
2064     if (!all)
2065       DBUG_RETURN(0);
2066     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2067     DBUG_RETURN(1);
2068   }
2069 
2070   if (tc_log)
2071     error= tc_log->rollback(thd, all);
2072   /*
2073     Mark multi-statement (any autocommit mode) or single-statement
2074     (autocommit=1) transaction as rolled back
2075   */
2076 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2077   if (all || !thd->in_active_multi_stmt_transaction())
2078   {
2079     MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2080     thd->m_transaction_psi= NULL;
2081   }
2082 #endif
2083 
2084   /* Always cleanup. Even if nht==0. There may be savepoints. */
2085   if (is_real_trans)
2086   {
2087     trn_ctx->cleanup();
2088     thd->tx_priority= 0;
2089   }
2090 
2091   if (all)
2092     thd->transaction_rollback_request= FALSE;
2093 
2094   /*
2095     Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2096     complete transaction is being rollback or autocommit=1.
2097     Notice, XA rollback has just invoked update_on_commit() through
2098     tc_log->*rollback* stack.
2099   */
2100   if (is_real_trans && !is_xa_rollback)
2101     gtid_state->update_on_rollback(thd);
2102 
2103   /*
2104     If the transaction cannot be rolled back safely, warn; don't warn if this
2105     is a slave thread (because when a slave thread executes a ROLLBACK, it has
2106     been read from the binary log, so it's 100% sure and normal to produce
2107     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2108     slave SQL thread, it would not stop the thread but just be printed in
2109     the error log; but we don't want users to wonder why they have this
2110     message in the error log, so we don't send it.
2111   */
2112   if (is_real_trans &&
2113       trn_ctx->cannot_safely_rollback(
2114         Transaction_ctx::SESSION) &&
2115       !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2116     trn_ctx->push_unsafe_rollback_warnings(thd);
2117 
2118   DBUG_RETURN(error);
2119 }
2120 
2121 
2122 /**
2123   Commit the attachable transaction in storage engines.
2124 
2125   @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2126         which commits attachable transaction but skips code which is
2127         unnecessary and unsafe for them (like dealing with GTIDs).
2128         Since attachable transactions are read-only their commit only
2129         needs to release resources and cleanup state in SE.
2130 
2131   @param thd     Current thread
2132 
2133   @retval 0      - Success
2134   @retval non-0  - Failure
2135 */
ha_commit_attachable(THD * thd)2136 int ha_commit_attachable(THD *thd)
2137 {
2138   int error= 0;
2139   Transaction_ctx *trn_ctx= thd->get_transaction();
2140   Ha_trx_info *ha_info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2141   Ha_trx_info *ha_info_next;
2142 
2143   /* This function only handles attachable transactions. */
2144   assert(thd->is_attachable_ro_transaction_active());
2145   /*
2146     Since the attachable transaction is AUTOCOMMIT we only need
2147     to care about statement transaction.
2148   */
2149   assert(! trn_ctx->is_active(Transaction_ctx::SESSION));
2150 
2151   if (ha_info)
2152   {
2153     for (; ha_info; ha_info= ha_info_next)
2154     {
2155       /* Attachable transaction is not supposed to modify anything. */
2156       assert(! ha_info->is_trx_read_write());
2157 
2158       handlerton *ht= ha_info->ht();
2159       if (ht->commit(ht, thd, false))
2160       {
2161         /*
2162           In theory this should not happen since attachable transactions
2163           are read only and therefore commit is supposed to only release
2164           resources/cleanup state. Even if this happens we will simply
2165           continue committing attachable transaction in other SEs.
2166         */
2167         assert(false);
2168         error= 1;
2169       }
2170       assert(!thd->status_var_aggregated);
2171       thd->status_var.ha_commit_count++;
2172       ha_info_next= ha_info->next();
2173 
2174       ha_info->reset(); /* keep it conveniently zero-filled */
2175     }
2176     trn_ctx->reset_scope(Transaction_ctx::STMT);
2177   }
2178 
2179   /*
2180     Mark transaction as commited in PSI.
2181   */
2182 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2183   if (thd->m_transaction_psi != NULL)
2184   {
2185     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2186     thd->m_transaction_psi= NULL;
2187   }
2188 #endif
2189 
2190   /* Free resources and perform other cleanup even for 'empty' transactions. */
2191   trn_ctx->cleanup();
2192 
2193   return (error);
2194 }
2195 
2196 
2197 /**
2198   @details
2199   This function should be called when MySQL sends rows of a SELECT result set
2200   or the EOF mark to the client. It releases a possible adaptive hash index
2201   S-latch held by thd in InnoDB and also releases a possible InnoDB query
2202   FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2203   keep them over several calls of the InnoDB handler interface when a join
2204   is executed. But when we let the control to pass to the client they have
2205   to be released because if the application program uses mysql_use_result(),
2206   it may deadlock on the S-latch if the application on another connection
2207   performs another SQL query. In MySQL-4.1 this is even more important because
2208   there a connection can have several SELECT queries open at the same time.
2209 
2210   @param thd           the thread handle of the current connection
2211 
2212   @return
2213     always 0
2214 */
2215 
ha_release_temporary_latches(THD * thd)2216 int ha_release_temporary_latches(THD *thd)
2217 {
2218   const Ha_trx_info *info;
2219   Transaction_ctx *trn_ctx= thd->get_transaction();
2220 
2221   /*
2222     Note that below we assume that only transactional storage engines
2223     may need release_temporary_latches(). If this will ever become false,
2224     we could iterate on thd->open_tables instead (and remove duplicates
2225     as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2226   */
2227   for (info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2228        info; info= info->next())
2229   {
2230     handlerton *hton= info->ht();
2231     if (hton && hton->release_temporary_latches)
2232         hton->release_temporary_latches(hton, thd);
2233   }
2234   return 0;
2235 }
2236 
2237 /**
2238   Check if all storage engines used in transaction agree that after
2239   rollback to savepoint it is safe to release MDL locks acquired after
2240   savepoint creation.
2241 
2242   @param thd   The client thread that executes the transaction.
2243 
2244   @return true  - It is safe to release MDL locks.
2245           false - If it is not.
2246 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2247 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2248 {
2249   Ha_trx_info *ha_info;
2250   Transaction_ctx *trn_ctx= thd->get_transaction();
2251   Transaction_ctx::enum_trx_scope trx_scope=
2252     thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2253 
2254   DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2255 
2256   /**
2257     Checking whether it is safe to release metadata locks after rollback to
2258     savepoint in all the storage engines that are part of the transaction.
2259   */
2260   for (ha_info= trn_ctx->ha_trx_info(trx_scope);
2261        ha_info; ha_info= ha_info->next())
2262   {
2263     handlerton *ht= ha_info->ht();
2264     assert(ht);
2265 
2266     if (ht->savepoint_rollback_can_release_mdl == 0 ||
2267         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2268       DBUG_RETURN(false);
2269   }
2270 
2271   DBUG_RETURN(true);
2272 }
2273 
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2274 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2275 {
2276   int error=0;
2277   Transaction_ctx *trn_ctx= thd->get_transaction();
2278   Transaction_ctx::enum_trx_scope trx_scope=
2279     !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2280 
2281   Ha_trx_info *ha_info, *ha_info_next;
2282 
2283   DBUG_ENTER("ha_rollback_to_savepoint");
2284 
2285   trn_ctx->set_rw_ha_count(trx_scope, 0);
2286   trn_ctx->set_no_2pc(trx_scope, 0);
2287   /*
2288     rolling back to savepoint in all storage engines that were part of the
2289     transaction when the savepoint was set
2290   */
2291   for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2292   {
2293     int err;
2294     handlerton *ht= ha_info->ht();
2295     assert(ht);
2296     assert(ht->savepoint_set != 0);
2297     if ((err= ht->savepoint_rollback(ht, thd,
2298                                      (uchar *)(sv+1)+ht->savepoint_offset)))
2299     { // cannot happen
2300       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2301       error=1;
2302     }
2303     assert(!thd->status_var_aggregated);
2304     thd->status_var.ha_savepoint_rollback_count++;
2305     if (ht->prepare == 0)
2306       trn_ctx->set_no_2pc(trx_scope, true);
2307   }
2308 
2309   /*
2310     rolling back the transaction in all storage engines that were not part of
2311     the transaction when the savepoint was set
2312   */
2313   for (ha_info= trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2314        ha_info= ha_info_next)
2315   {
2316     int err;
2317     handlerton *ht= ha_info->ht();
2318     if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2319     { // cannot happen
2320       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2321       error=1;
2322     }
2323     assert(!thd->status_var_aggregated);
2324     thd->status_var.ha_rollback_count++;
2325     ha_info_next= ha_info->next();
2326     ha_info->reset(); /* keep it conveniently zero-filled */
2327   }
2328   trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2329 
2330 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2331   if (thd->m_transaction_psi != NULL)
2332     MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2333 #endif
2334 
2335   DBUG_RETURN(error);
2336 }
2337 
ha_prepare_low(THD * thd,bool all)2338 int ha_prepare_low(THD *thd, bool all)
2339 {
2340   int error= 0;
2341   Transaction_ctx::enum_trx_scope trx_scope=
2342     all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2343   Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2344 
2345   DBUG_ENTER("ha_prepare_low");
2346 
2347   if (ha_info)
2348   {
2349     for (; ha_info && !error; ha_info= ha_info->next())
2350     {
2351       int err= 0;
2352       handlerton *ht= ha_info->ht();
2353       /*
2354         Do not call two-phase commit if this particular
2355         transaction is read-only. This allows for simpler
2356         implementation in engines that are always read-only.
2357       */
2358       if (!ha_info->is_trx_read_write())
2359         continue;
2360       if ((err= ht->prepare(ht, thd, all)))
2361       {
2362         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2363         error= 1;
2364       }
2365       assert(!thd->status_var_aggregated);
2366       thd->status_var.ha_prepare_count++;
2367     }
2368     DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2369   }
2370 
2371   DBUG_RETURN(error);
2372 }
2373 
2374 /**
2375   @note
2376   according to the sql standard (ISO/IEC 9075-2:2003)
2377   section "4.33.4 SQL-statements and transaction states",
2378   SAVEPOINT is *not* transaction-initiating SQL-statement
2379 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2380 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2381 {
2382   int error=0;
2383   Transaction_ctx::enum_trx_scope trx_scope=
2384     !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2385   Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2386   Ha_trx_info *begin_ha_info= ha_info;
2387 
2388   DBUG_ENTER("ha_savepoint");
2389 
2390   for (; ha_info; ha_info= ha_info->next())
2391   {
2392     int err;
2393     handlerton *ht= ha_info->ht();
2394     assert(ht);
2395     if (! ht->savepoint_set)
2396     {
2397       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2398       error=1;
2399       break;
2400     }
2401     if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2402     { // cannot happen
2403       my_error(ER_GET_ERRNO, MYF(0), err);
2404       error=1;
2405     }
2406     assert(!thd->status_var_aggregated);
2407     thd->status_var.ha_savepoint_count++;
2408   }
2409   /*
2410     Remember the list of registered storage engines. All new
2411     engines are prepended to the beginning of the list.
2412   */
2413   sv->ha_list= begin_ha_info;
2414 
2415 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2416   if (!error && thd->m_transaction_psi != NULL)
2417     MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2418 #endif
2419 
2420   DBUG_RETURN(error);
2421 }
2422 
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2423 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2424 {
2425   int error=0;
2426   Ha_trx_info *ha_info= sv->ha_list;
2427   DBUG_ENTER("ha_release_savepoint");
2428 
2429   for (; ha_info; ha_info= ha_info->next())
2430   {
2431     int err;
2432     handlerton *ht= ha_info->ht();
2433     /* Savepoint life time is enclosed into transaction life time. */
2434     assert(ht);
2435     if (!ht->savepoint_release)
2436       continue;
2437     if ((err= ht->savepoint_release(ht, thd,
2438                                     (uchar *)(sv+1) + ht->savepoint_offset)))
2439     { // cannot happen
2440       my_error(ER_GET_ERRNO, MYF(0), err);
2441       error=1;
2442     }
2443   }
2444 
2445 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2446   if (thd->m_transaction_psi != NULL)
2447     MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2448 #endif
2449   DBUG_RETURN(error);
2450 }
2451 
2452 
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2453 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2454                                    void *arg)
2455 {
2456   handlerton *hton= plugin_data<handlerton*>(plugin);
2457   if (hton->state == SHOW_OPTION_YES &&
2458       hton->start_consistent_snapshot)
2459   {
2460     hton->start_consistent_snapshot(hton, thd);
2461     *((bool *)arg)= false;
2462   }
2463   return FALSE;
2464 }
2465 
ha_start_consistent_snapshot(THD * thd)2466 int ha_start_consistent_snapshot(THD *thd)
2467 {
2468   bool warn= true;
2469 
2470   plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2471 
2472   /*
2473     Same idea as when one wants to CREATE TABLE in one engine which does not
2474     exist:
2475   */
2476   if (warn)
2477     push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2478                  "This MySQL server does not support any "
2479                  "consistent-read capable storage engine");
2480   return 0;
2481 }
2482 
2483 
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2484 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2485                                 void *arg)
2486 {
2487   handlerton *hton= plugin_data<handlerton*>(plugin);
2488   if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2489       hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2490     return TRUE;
2491   return FALSE;
2492 }
2493 
2494 
ha_flush_logs(handlerton * db_type,bool binlog_group_flush)2495 bool ha_flush_logs(handlerton *db_type, bool binlog_group_flush)
2496 {
2497   if (db_type == NULL)
2498   {
2499     if (plugin_foreach(NULL, flush_handlerton,
2500                        MYSQL_STORAGE_ENGINE_PLUGIN,
2501                        static_cast<void *>(&binlog_group_flush)))
2502       return TRUE;
2503   }
2504   else
2505   {
2506     if (db_type->state != SHOW_OPTION_YES ||
2507         (db_type->flush_logs &&
2508          db_type->flush_logs(db_type, binlog_group_flush)))
2509       return TRUE;
2510   }
2511   return FALSE;
2512 }
2513 
2514 
2515 /**
2516   @brief make canonical filename
2517 
2518   @param[in]  file     table handler
2519   @param[in]  path     original path
2520   @param[out] tmp_path buffer for canonized path
2521 
2522   @details Lower case db name and table name path parts for
2523            non file based tables when lower_case_table_names
2524            is 2 (store as is, compare in lower case).
2525            Filesystem path prefix (mysql_data_home or tmpdir)
2526            is left intact.
2527 
2528   @note tmp_path may be left intact if no conversion was
2529         performed.
2530 
2531   @retval canonized path
2532 
2533   @todo This may be done more efficiently when table path
2534         gets built. Convert this function to something like
2535         ASSERT_CANONICAL_FILENAME.
2536 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2537 const char *get_canonical_filename(handler *file, const char *path,
2538                                    char *tmp_path)
2539 {
2540   uint i;
2541   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2542     return path;
2543 
2544   for (i= 0; i <= mysql_tmpdir_list.max; i++)
2545   {
2546     if (is_prefix(path, mysql_tmpdir_list.list[i]))
2547       return path;
2548   }
2549 
2550   /* Ensure that table handler get path in lower case */
2551   if (tmp_path != path)
2552     my_stpcpy(tmp_path, path);
2553 
2554   /*
2555     we only should turn into lowercase database/table part
2556     so start the process after homedirectory
2557   */
2558   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2559   return tmp_path;
2560 }
2561 
2562 
2563 class Ha_delete_table_error_handler: public Internal_error_handler
2564 {
2565 public:
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)2566   virtual bool handle_condition(THD *thd,
2567                                 uint sql_errno,
2568                                 const char* sqlstate,
2569                                 Sql_condition::enum_severity_level *level,
2570                                 const char* msg)
2571   {
2572     /* Downgrade errors to warnings. */
2573     if (*level == Sql_condition::SL_ERROR)
2574       *level= Sql_condition::SL_WARNING;
2575     return false;
2576   }
2577 };
2578 
2579 
2580 /** @brief
2581   This should return ENOENT if the file doesn't exists.
2582   The .frm file will be deleted only if we return 0 or ENOENT
2583 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2584 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2585                     const char *db, const char *alias, bool generate_warning)
2586 {
2587   handler *file;
2588   char tmp_path[FN_REFLEN];
2589   int error;
2590   TABLE dummy_table;
2591   TABLE_SHARE dummy_share;
2592   DBUG_ENTER("ha_delete_table");
2593 
2594   dummy_table.s= &dummy_share;
2595 
2596   /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2597   if (table_type == NULL ||
2598       ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2599     DBUG_RETURN(ENOENT);
2600 
2601   path= get_canonical_filename(file, path, tmp_path);
2602   if ((error= file->ha_delete_table(path)) && generate_warning)
2603   {
2604     /*
2605       Because file->print_error() use my_error() to generate the error message
2606       we use an internal error handler to intercept it and store the text
2607       in a temporary buffer. Later the message will be presented to user
2608       as a warning.
2609     */
2610     Ha_delete_table_error_handler ha_delete_table_error_handler;
2611 
2612     /* Fill up strucutures that print_error may need */
2613     dummy_share.path.str= (char*) path;
2614     dummy_share.path.length= strlen(path);
2615     dummy_share.db.str= (char*) db;
2616     dummy_share.db.length= strlen(db);
2617     dummy_share.table_name.str= (char*) alias;
2618     dummy_share.table_name.length= strlen(alias);
2619     dummy_table.alias= alias;
2620 
2621     file->change_table_ptr(&dummy_table, &dummy_share);
2622 
2623     /*
2624       XXX: should we convert *all* errors to warnings here?
2625       What if the error is fatal?
2626     */
2627     thd->push_internal_handler(&ha_delete_table_error_handler);
2628     file->print_error(error, 0);
2629 
2630     thd->pop_internal_handler();
2631   }
2632   delete file;
2633 
2634 #ifdef HAVE_PSI_TABLE_INTERFACE
2635   if (likely(error == 0))
2636   {
2637     /* Table share not available, so check path for temp_table prefix. */
2638     bool temp_table= (strstr(path, tmp_file_prefix) != NULL);
2639     PSI_TABLE_CALL(drop_table_share)
2640       (temp_table, db, strlen(db), alias, strlen(alias));
2641   }
2642 #endif
2643 
2644   DBUG_RETURN(error);
2645 }
2646 
2647 /****************************************************************************
2648 ** General handler functions
2649 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2650 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2651 {
2652   DBUG_ENTER("handler::clone");
2653   handler *new_handler= get_new_handler(table->s, mem_root, ht);
2654 
2655   if (!new_handler)
2656     DBUG_RETURN(NULL);
2657   if (new_handler->set_ha_share_ref(ha_share))
2658     goto err;
2659 
2660   /*
2661     Allocate handler->ref here because otherwise ha_open will allocate it
2662     on this->table->mem_root and we will not be able to reclaim that memory
2663     when the clone handler object is destroyed.
2664   */
2665   if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2666                                               ALIGN_SIZE(ref_length)*2)))
2667     goto err;
2668   /*
2669     TODO: Implement a more efficient way to have more than one index open for
2670     the same table instance. The ha_open call is not cachable for clone.
2671   */
2672   if (new_handler->ha_open(table, name, table->db_stat,
2673                            HA_OPEN_IGNORE_IF_LOCKED))
2674     goto err;
2675 
2676   DBUG_RETURN(new_handler);
2677 
2678 err:
2679   delete new_handler;
2680   DBUG_RETURN(NULL);
2681 }
2682 
2683 
ha_statistic_increment(ulonglong SSV::* offset) const2684 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2685 {
2686   if (table && table->in_use) (table->in_use->status_var.*offset)++;
2687 }
2688 
2689 
ha_thd(void) const2690 THD *handler::ha_thd(void) const
2691 {
2692   assert(!table || !table->in_use || table->in_use == current_thd);
2693   return (table && table->in_use) ? table->in_use : current_thd;
2694 }
2695 
unbind_psi()2696 void handler::unbind_psi()
2697 {
2698 #ifdef HAVE_PSI_TABLE_INTERFACE
2699   assert(m_lock_type == F_UNLCK);
2700   assert(inited == NONE);
2701   /*
2702     Notify the instrumentation that this table is not owned
2703     by this thread any more.
2704   */
2705   PSI_TABLE_CALL(unbind_table)(m_psi);
2706 #endif
2707 }
2708 
rebind_psi()2709 void handler::rebind_psi()
2710 {
2711 #ifdef HAVE_PSI_TABLE_INTERFACE
2712   assert(m_lock_type == F_UNLCK);
2713   assert(inited == NONE);
2714   /*
2715     Notify the instrumentation that this table is now owned
2716     by this thread.
2717   */
2718   PSI_table_share *share_psi= ha_table_share_psi(table_share);
2719   m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2720 #endif
2721 }
2722 
start_psi_batch_mode()2723 void handler::start_psi_batch_mode()
2724 {
2725 #ifdef HAVE_PSI_TABLE_INTERFACE
2726   assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2727   assert(m_psi_locker == NULL);
2728   m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2729   m_psi_numrows= 0;
2730 #endif
2731 }
2732 
end_psi_batch_mode()2733 void handler::end_psi_batch_mode()
2734 {
2735 #ifdef HAVE_PSI_TABLE_INTERFACE
2736   assert(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2737   if (m_psi_locker != NULL)
2738   {
2739     assert(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2740     PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2741     m_psi_locker= NULL;
2742   }
2743   m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2744 #endif
2745 }
2746 
ha_table_share_psi(const TABLE_SHARE * share) const2747 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2748 {
2749   return share->m_psi;
2750 }
2751 
2752 /** @brief
2753   Open database-handler.
2754 
2755   IMPLEMENTATION
2756     Try O_RDONLY if cannot open as O_RDWR
2757     Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2758 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2759 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2760                      int test_if_locked)
2761 {
2762   int error;
2763   DBUG_ENTER("handler::ha_open");
2764   DBUG_PRINT("enter",
2765              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2766               name, ht->db_type, table_arg->db_stat, mode,
2767               test_if_locked));
2768 
2769   table= table_arg;
2770   assert(table->s == table_share);
2771   assert(m_lock_type == F_UNLCK);
2772   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2773   assert(alloc_root_inited(&table->mem_root));
2774 
2775   if ((error=open(name,mode,test_if_locked)))
2776   {
2777     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2778 	(table->db_stat & HA_TRY_READ_ONLY))
2779     {
2780       table->db_stat|=HA_READ_ONLY;
2781       error=open(name,O_RDONLY,test_if_locked);
2782     }
2783   }
2784   if (error)
2785   {
2786     set_my_errno(error);                            /* Safeguard */
2787     DBUG_PRINT("error",("error: %d  errno: %d",error,errno));
2788   }
2789   else
2790   {
2791     assert(m_psi == NULL);
2792     assert(table_share != NULL);
2793 #ifdef HAVE_PSI_TABLE_INTERFACE
2794     /*
2795       Do not call this for partitions handlers, since it may take too much
2796       resources.
2797       So only use the m_psi on table level, not for individual partitions.
2798     */
2799     if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2800     {
2801       PSI_table_share *share_psi= ha_table_share_psi(table_share);
2802       m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2803     }
2804 #endif
2805 
2806     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2807       table->db_stat|=HA_READ_ONLY;
2808     (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
2809 
2810     /* ref is already allocated for us if we're called from handler::clone() */
2811     if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2812                                           ALIGN_SIZE(ref_length)*2)))
2813     {
2814       ha_close();
2815       error=HA_ERR_OUT_OF_MEM;
2816     }
2817     else
2818       dup_ref=ref+ALIGN_SIZE(ref_length);
2819     cached_table_flags= table_flags();
2820   }
2821   DBUG_RETURN(error);
2822 }
2823 
2824 
2825 /**
2826   Close handler.
2827 */
2828 
ha_close(void)2829 int handler::ha_close(void)
2830 {
2831   DBUG_ENTER("handler::ha_close");
2832 #ifdef HAVE_PSI_TABLE_INTERFACE
2833   PSI_TABLE_CALL(close_table)(table_share, m_psi);
2834   m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2835   assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2836   assert(m_psi_locker == NULL);
2837 #endif
2838   // TODO: set table= NULL to mark the handler as closed?
2839   assert(m_psi == NULL);
2840   assert(m_lock_type == F_UNLCK);
2841   assert(inited == NONE);
2842   DBUG_RETURN(close());
2843 }
2844 
2845 
2846 /**
2847   Initialize use of index.
2848 
2849   @param idx     Index to use
2850   @param sorted  Use sorted order
2851 
2852   @return Operation status
2853     @retval 0     Success
2854     @retval != 0  Error (error code returned)
2855 */
2856 
ha_index_init(uint idx,bool sorted)2857 int handler::ha_index_init(uint idx, bool sorted)
2858 {
2859   DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2860   int result;
2861   DBUG_ENTER("ha_index_init");
2862   assert(table_share->tmp_table != NO_TMP_TABLE ||
2863          m_lock_type != F_UNLCK);
2864   assert(inited == NONE);
2865   if (!(result= index_init(idx, sorted)))
2866     inited= INDEX;
2867   end_range= NULL;
2868   DBUG_RETURN(result);
2869 }
2870 
2871 
2872 /**
2873   End use of index.
2874 
2875   @return Operation status
2876     @retval 0     Success
2877     @retval != 0  Error (error code returned)
2878 */
2879 
ha_index_end()2880 int handler::ha_index_end()
2881 {
2882   DBUG_ENTER("ha_index_end");
2883   /* SQL HANDLER function can call this without having it locked. */
2884   assert(table->open_by_handler ||
2885          table_share->tmp_table != NO_TMP_TABLE ||
2886          m_lock_type != F_UNLCK);
2887   assert(inited == INDEX);
2888   inited= NONE;
2889   end_range= NULL;
2890   DBUG_RETURN(index_end());
2891 }
2892 
2893 
2894 /**
2895   Initialize table for random read or scan.
2896 
2897   @param scan  if true: Initialize for random scans through rnd_next()
2898                if false: Initialize for random reads through rnd_pos()
2899 
2900   @return Operation status
2901     @retval 0     Success
2902     @retval != 0  Error (error code returned)
2903 */
2904 
ha_rnd_init(bool scan)2905 int handler::ha_rnd_init(bool scan)
2906 {
2907   DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2908   int result;
2909   DBUG_ENTER("ha_rnd_init");
2910   assert(table_share->tmp_table != NO_TMP_TABLE ||
2911          m_lock_type != F_UNLCK);
2912   assert(inited == NONE || (inited == RND && scan));
2913   inited= (result= rnd_init(scan)) ? NONE : RND;
2914   end_range= NULL;
2915   DBUG_RETURN(result);
2916 }
2917 
2918 
2919 /**
2920   End use of random access.
2921 
2922   @return Operation status
2923     @retval 0     Success
2924     @retval != 0  Error (error code returned)
2925 */
2926 
ha_rnd_end()2927 int handler::ha_rnd_end()
2928 {
2929   DBUG_ENTER("ha_rnd_end");
2930   /* SQL HANDLER function can call this without having it locked. */
2931   assert(table->open_by_handler ||
2932          table_share->tmp_table != NO_TMP_TABLE ||
2933          m_lock_type != F_UNLCK);
2934   assert(inited == RND);
2935   inited= NONE;
2936   end_range= NULL;
2937   DBUG_RETURN(rnd_end());
2938 }
2939 
2940 
2941 /**
2942   Read next row via random scan.
2943 
2944   @param buf  Buffer to read the row into
2945 
2946   @return Operation status
2947     @retval 0     Success
2948     @retval != 0  Error (error code returned)
2949 */
2950 
ha_rnd_next(uchar * buf)2951 int handler::ha_rnd_next(uchar *buf)
2952 {
2953   int result;
2954   DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
2955   DBUG_ENTER("handler::ha_rnd_next");
2956   assert(table_share->tmp_table != NO_TMP_TABLE ||
2957          m_lock_type != F_UNLCK);
2958   assert(inited == RND);
2959 
2960   // Set status for the need to update generated fields
2961   m_update_generated_read_fields= table->has_gcol();
2962 
2963   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2964     { result= rnd_next(buf); })
2965   if (!result && m_update_generated_read_fields)
2966   {
2967     result= update_generated_read_fields(buf, table);
2968     m_update_generated_read_fields= false;
2969   }
2970   DBUG_RETURN(result);
2971 }
2972 
2973 
2974 /**
2975   Read row via random scan from position.
2976 
2977   @param[out] buf  Buffer to read the row into
2978   @param      pos  Position from position() call
2979 
2980   @return Operation status
2981     @retval 0     Success
2982     @retval != 0  Error (error code returned)
2983 */
2984 
ha_rnd_pos(uchar * buf,uchar * pos)2985 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2986 {
2987   int result;
2988   DBUG_ENTER("handler::ha_rnd_pos");
2989   assert(table_share->tmp_table != NO_TMP_TABLE ||
2990          m_lock_type != F_UNLCK);
2991   /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2992   /* assert(inited == RND); */
2993 
2994   // Set status for the need to update generated fields
2995   m_update_generated_read_fields= table->has_gcol();
2996 
2997   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2998     { result= rnd_pos(buf, pos); })
2999   if (!result && m_update_generated_read_fields)
3000   {
3001     result= update_generated_read_fields(buf, table);
3002     m_update_generated_read_fields= false;
3003   }
3004   DBUG_RETURN(result);
3005 }
3006 
3007 
3008 /**
3009   Read [part of] row via [part of] index.
3010   @param[out] buf          buffer where store the data
3011   @param      key          Key to search for
3012   @param      keypart_map  Which part of key to use
3013   @param      find_flag    Direction/condition on key usage
3014 
3015   @returns Operation status
3016     @retval  0                   Success (found a record, and function has
3017                                  set table->status to 0)
3018     @retval  HA_ERR_END_OF_FILE  Row not found (function has set table->status
3019                                  to STATUS_NOT_FOUND). End of index passed.
3020     @retval  HA_ERR_KEY_NOT_FOUND Row not found (function has set table->status
3021                                  to STATUS_NOT_FOUND). Index cursor positioned.
3022     @retval  != 0                Error
3023 
3024   @note Positions an index cursor to the index specified in the handle.
3025   Fetches the row if available. If the key value is null,
3026   begin at the first key of the index.
3027   ha_index_read_map can be restarted without calling index_end on the previous
3028   index scan and without calling ha_index_init. In this case the
3029   ha_index_read_map is on the same index as the previous ha_index_scan.
3030   This is particularly used in conjunction with multi read ranges.
3031 */
3032 
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3033 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3034                                key_part_map keypart_map,
3035                                enum ha_rkey_function find_flag)
3036 {
3037   int result;
3038   DBUG_ENTER("handler::ha_index_read_map");
3039   assert(table_share->tmp_table != NO_TMP_TABLE ||
3040          m_lock_type != F_UNLCK);
3041   assert(inited == INDEX);
3042   assert(!pushed_idx_cond || buf == table->record[0]);
3043 
3044   // Set status for the need to update generated fields
3045   m_update_generated_read_fields= table->has_gcol();
3046 
3047   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3048     { result= index_read_map(buf, key, keypart_map, find_flag); })
3049   if (!result && m_update_generated_read_fields)
3050   {
3051     result= update_generated_read_fields(buf, table, active_index);
3052     m_update_generated_read_fields= false;
3053   }
3054   DBUG_RETURN(result);
3055 }
3056 
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3057 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3058                                     key_part_map keypart_map)
3059 {
3060   int result;
3061   DBUG_ENTER("handler::ha_index_read_last_map");
3062   assert(table_share->tmp_table != NO_TMP_TABLE ||
3063          m_lock_type != F_UNLCK);
3064   assert(inited == INDEX);
3065   assert(!pushed_idx_cond || buf == table->record[0]);
3066 
3067   // Set status for the need to update generated fields
3068   m_update_generated_read_fields= table->has_gcol();
3069 
3070   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3071     { result= index_read_last_map(buf, key, keypart_map); })
3072   if (!result && m_update_generated_read_fields)
3073   {
3074     result= update_generated_read_fields(buf, table, active_index);
3075     m_update_generated_read_fields= false;
3076   }
3077   DBUG_RETURN(result);
3078 }
3079 
3080 /**
3081   Initializes an index and read it.
3082 
3083   @see handler::ha_index_read_map.
3084 */
3085 
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3086 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3087                                    key_part_map keypart_map,
3088                                    enum ha_rkey_function find_flag)
3089 {
3090   int result;
3091   assert(table_share->tmp_table != NO_TMP_TABLE ||
3092          m_lock_type != F_UNLCK);
3093   assert(end_range == NULL);
3094   assert(!pushed_idx_cond || buf == table->record[0]);
3095 
3096   // Set status for the need to update generated fields
3097   m_update_generated_read_fields= table->has_gcol();
3098 
3099   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result,
3100     { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3101   if (!result && m_update_generated_read_fields)
3102   {
3103     result= update_generated_read_fields(buf, table, index);
3104     m_update_generated_read_fields= false;
3105   }
3106   return result;
3107 }
3108 
3109 
3110 /**
3111   Reads the next row via index.
3112 
3113   @param[out] buf  Row data
3114 
3115   @return Operation status.
3116     @retval  0                   Success
3117     @retval  HA_ERR_END_OF_FILE  Row not found
3118     @retval  != 0                Error
3119 */
3120 
ha_index_next(uchar * buf)3121 int handler::ha_index_next(uchar * buf)
3122 {
3123   int result;
3124   DBUG_ENTER("handler::ha_index_next");
3125   assert(table_share->tmp_table != NO_TMP_TABLE ||
3126          m_lock_type != F_UNLCK);
3127   assert(inited == INDEX);
3128   assert(!pushed_idx_cond || buf == table->record[0]);
3129 
3130   // Set status for the need to update generated fields
3131   m_update_generated_read_fields= table->has_gcol();
3132 
3133   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3134     { result= index_next(buf); })
3135   if (!result && m_update_generated_read_fields)
3136   {
3137     result= update_generated_read_fields(buf, table, active_index);
3138     m_update_generated_read_fields= false;
3139   }
3140   DBUG_RETURN(result);
3141 }
3142 
3143 
3144 /**
3145   Reads the previous row via index.
3146 
3147   @param[out] buf  Row data
3148 
3149   @return Operation status.
3150     @retval  0                   Success
3151     @retval  HA_ERR_END_OF_FILE  Row not found
3152     @retval  != 0                Error
3153 */
3154 
ha_index_prev(uchar * buf)3155 int handler::ha_index_prev(uchar * buf)
3156 {
3157   int result;
3158   DBUG_ENTER("handler::ha_index_prev");
3159   assert(table_share->tmp_table != NO_TMP_TABLE ||
3160          m_lock_type != F_UNLCK);
3161   assert(inited == INDEX);
3162   assert(!pushed_idx_cond || buf == table->record[0]);
3163 
3164   // Set status for the need to update generated fields
3165   m_update_generated_read_fields= table->has_gcol();
3166 
3167   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3168     { result= index_prev(buf); })
3169   if (!result && m_update_generated_read_fields)
3170   {
3171     result= update_generated_read_fields(buf, table, active_index);
3172     m_update_generated_read_fields= false;
3173   }
3174   DBUG_RETURN(result);
3175 }
3176 
3177 
3178 /**
3179   Reads the first row via index.
3180 
3181   @param[out] buf  Row data
3182 
3183   @return Operation status.
3184     @retval  0                   Success
3185     @retval  HA_ERR_END_OF_FILE  Row not found
3186     @retval  != 0                Error
3187 */
3188 
ha_index_first(uchar * buf)3189 int handler::ha_index_first(uchar * buf)
3190 {
3191   int result;
3192   DBUG_ENTER("handler::ha_index_first");
3193   assert(table_share->tmp_table != NO_TMP_TABLE ||
3194          m_lock_type != F_UNLCK);
3195   assert(inited == INDEX);
3196   assert(!pushed_idx_cond || buf == table->record[0]);
3197 
3198   // Set status for the need to update generated fields
3199   m_update_generated_read_fields= table->has_gcol();
3200 
3201   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3202     { result= index_first(buf); })
3203   if (!result && m_update_generated_read_fields)
3204   {
3205     result= update_generated_read_fields(buf, table, active_index);
3206     m_update_generated_read_fields= false;
3207   }
3208   DBUG_RETURN(result);
3209 }
3210 
3211 
3212 /**
3213   Reads the last row via index.
3214 
3215   @param[out] buf  Row data
3216 
3217   @return Operation status.
3218     @retval  0                   Success
3219     @retval  HA_ERR_END_OF_FILE  Row not found
3220     @retval  != 0                Error
3221 */
3222 
ha_index_last(uchar * buf)3223 int handler::ha_index_last(uchar * buf)
3224 {
3225   int result;
3226   DBUG_ENTER("handler::ha_index_last");
3227   assert(table_share->tmp_table != NO_TMP_TABLE ||
3228          m_lock_type != F_UNLCK);
3229   assert(inited == INDEX);
3230   assert(!pushed_idx_cond || buf == table->record[0]);
3231 
3232   // Set status for the need to update generated fields
3233   m_update_generated_read_fields= table->has_gcol();
3234 
3235   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3236     { result= index_last(buf); })
3237   if (!result && m_update_generated_read_fields)
3238   {
3239     result= update_generated_read_fields(buf, table, active_index);
3240     m_update_generated_read_fields= false;
3241   }
3242   DBUG_RETURN(result);
3243 }
3244 
3245 
3246 /**
3247   Reads the next same row via index.
3248 
3249   @param[out] buf     Row data
3250   @param      key     Key to search for
3251   @param      keylen  Length of key
3252 
3253   @return Operation status.
3254     @retval  0                   Success
3255     @retval  HA_ERR_END_OF_FILE  Row not found
3256     @retval  != 0                Error
3257 */
3258 
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3259 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3260 {
3261   int result;
3262   DBUG_ENTER("handler::ha_index_next_same");
3263   assert(table_share->tmp_table != NO_TMP_TABLE ||
3264          m_lock_type != F_UNLCK);
3265   assert(inited == INDEX);
3266   assert(!pushed_idx_cond || buf == table->record[0]);
3267 
3268   // Set status for the need to update generated fields
3269   m_update_generated_read_fields= table->has_gcol();
3270 
3271   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3272     { result= index_next_same(buf, key, keylen); })
3273   if (!result && m_update_generated_read_fields)
3274   {
3275     result= update_generated_read_fields(buf, table, active_index);
3276     m_update_generated_read_fields= false;
3277   }
3278   DBUG_RETURN(result);
3279 }
3280 
3281 
3282 /**
3283   Read first row (only) from a table.
3284 
3285   This is never called for InnoDB tables, as these table types
3286   has the HA_STATS_RECORDS_IS_EXACT set.
3287 */
read_first_row(uchar * buf,uint primary_key)3288 int handler::read_first_row(uchar * buf, uint primary_key)
3289 {
3290   int error;
3291   DBUG_ENTER("handler::read_first_row");
3292 
3293   ha_statistic_increment(&SSV::ha_read_first_count);
3294 
3295   /*
3296     If there is very few deleted rows in the table, find the first row by
3297     scanning the table.
3298     TODO remove the test for HA_READ_ORDER
3299   */
3300   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3301       !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3302   {
3303     if (!(error= ha_rnd_init(1)))
3304     {
3305       while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3306         /* skip deleted row */;
3307       const int end_error= ha_rnd_end();
3308       if (!error)
3309         error= end_error;
3310     }
3311   }
3312   else
3313   {
3314     /* Find the first row through the primary key */
3315     if (!(error= ha_index_init(primary_key, 0)))
3316     {
3317       error= ha_index_first(buf);
3318       const int end_error= ha_index_end();
3319       if (!error)
3320         error= end_error;
3321     }
3322   }
3323   DBUG_RETURN(error);
3324 }
3325 
3326 /**
3327   Generate the next auto-increment number based on increment and offset.
3328   computes the lowest number
3329   - strictly greater than "nr"
3330   - of the form: auto_increment_offset + N * auto_increment_increment
3331   If overflow happened then return MAX_ULONGLONG value as an
3332   indication of overflow.
3333   In most cases increment= offset= 1, in which case we get:
3334   @verbatim 1,2,3,4,5,... @endverbatim
3335     If increment=10 and offset=5 and previous number is 1, we get:
3336   @verbatim 1,5,15,25,35,... @endverbatim
3337 */
3338 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3339 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3340 {
3341   const ulonglong save_nr= nr;
3342 
3343   if (variables->auto_increment_increment == 1)
3344     nr= nr + 1; // optimization of the formula below
3345   else
3346   {
3347     nr= (((nr+ variables->auto_increment_increment -
3348            variables->auto_increment_offset)) /
3349          (ulonglong) variables->auto_increment_increment);
3350     nr= (nr* (ulonglong) variables->auto_increment_increment +
3351          variables->auto_increment_offset);
3352   }
3353 
3354   if (unlikely(nr <= save_nr))
3355     return ULLONG_MAX;
3356 
3357   return nr;
3358 }
3359 
3360 
adjust_next_insert_id_after_explicit_value(ulonglong nr)3361 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3362 {
3363   /*
3364     If we have set THD::next_insert_id previously and plan to insert an
3365     explicitely-specified value larger than this, we need to increase
3366     THD::next_insert_id to be greater than the explicit value.
3367   */
3368   if ((next_insert_id > 0) && (nr >= next_insert_id))
3369     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3370 }
3371 
3372 
3373 /** @brief
3374   Computes the largest number X:
3375   - smaller than or equal to "nr"
3376   - of the form: auto_increment_offset + N * auto_increment_increment
3377   where N>=0.
3378 
3379   SYNOPSIS
3380     prev_insert_id
3381       nr            Number to "round down"
3382       variables     variables struct containing auto_increment_increment and
3383                     auto_increment_offset
3384 
3385   RETURN
3386     The number X if it exists, "nr" otherwise.
3387 */
3388 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3389 prev_insert_id(ulonglong nr, struct system_variables *variables)
3390 {
3391   if (unlikely(nr < variables->auto_increment_offset))
3392   {
3393     /*
3394       There's nothing good we can do here. That is a pathological case, where
3395       the offset is larger than the column's max possible value, i.e. not even
3396       the first sequence value may be inserted. User will receive warning.
3397     */
3398     DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3399                        "auto_increment_offset: %lu",
3400                        (ulong) nr, variables->auto_increment_offset));
3401     return nr;
3402   }
3403   if (variables->auto_increment_increment == 1)
3404     return nr; // optimization of the formula below
3405   nr= (((nr - variables->auto_increment_offset)) /
3406        (ulonglong) variables->auto_increment_increment);
3407   return (nr * (ulonglong) variables->auto_increment_increment +
3408           variables->auto_increment_offset);
3409 }
3410 
3411 
3412 /**
3413   Update the auto_increment field if necessary.
3414 
3415   Updates columns with type NEXT_NUMBER if:
3416 
3417   - If column value is set to NULL (in which case
3418     auto_increment_field_not_null is 0)
3419   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3420     set. In the future we will only set NEXT_NUMBER fields if one sets them
3421     to NULL (or they are not included in the insert list).
3422 
3423     In those cases, we check if the currently reserved interval still has
3424     values we have not used. If yes, we pick the smallest one and use it.
3425     Otherwise:
3426 
3427   - If a list of intervals has been provided to the statement via SET
3428     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3429     first unused interval from this list, consider it as reserved.
3430 
3431   - Otherwise we set the column for the first row to the value
3432     next_insert_id(get_auto_increment(column))) which is usually
3433     max-used-column-value+1.
3434     We call get_auto_increment() for the first row in a multi-row
3435     statement. get_auto_increment() will tell us the interval of values it
3436     reserved for us.
3437 
3438   - In both cases, for the following rows we use those reserved values without
3439     calling the handler again (we just progress in the interval, computing
3440     each new value from the previous one). Until we have exhausted them, then
3441     we either take the next provided interval or call get_auto_increment()
3442     again to reserve a new interval.
3443 
3444   - In both cases, the reserved intervals are remembered in
3445     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3446     binlogging; the last reserved interval is remembered in
3447     auto_inc_interval_for_cur_row. The number of reserved intervals is
3448     remembered in auto_inc_intervals_count. It differs from the number of
3449     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3450     latter list is cumulative over all statements forming one binlog event
3451     (when stored functions and triggers are used), and collapses two
3452     contiguous intervals in one (see its append() method).
3453 
3454     The idea is that generated auto_increment values are predictable and
3455     independent of the column values in the table.  This is needed to be
3456     able to replicate into a table that already has rows with a higher
3457     auto-increment value than the one that is inserted.
3458 
3459     After we have already generated an auto-increment number and the user
3460     inserts a column with a higher value than the last used one, we will
3461     start counting from the inserted value.
3462 
3463     This function's "outputs" are: the table's auto_increment field is filled
3464     with a value, thd->next_insert_id is filled with the value to use for the
3465     next row, if a value was autogenerated for the current row it is stored in
3466     thd->insert_id_for_cur_row, if get_auto_increment() was called
3467     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3468     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3469     this list.
3470 
3471   @todo
3472     Replace all references to "next number" or NEXT_NUMBER to
3473     "auto_increment", everywhere (see below: there is
3474     table->auto_increment_field_not_null, and there also exists
3475     table->next_number_field, it's not consistent).
3476 
3477   @retval
3478     0	ok
3479   @retval
3480     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3481     returned ~(ulonglong) 0
3482   @retval
3483     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3484     failure.
3485 */
3486 
3487 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3488 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3489 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3490 
update_auto_increment()3491 int handler::update_auto_increment()
3492 {
3493   ulonglong nr, nb_reserved_values;
3494   bool append= FALSE;
3495   THD *thd= table->in_use;
3496   struct system_variables *variables= &thd->variables;
3497   assert(table_share->tmp_table != NO_TMP_TABLE ||
3498          m_lock_type != F_UNLCK);
3499   DBUG_ENTER("handler::update_auto_increment");
3500 
3501   /*
3502     next_insert_id is a "cursor" into the reserved interval, it may go greater
3503     than the interval, but not smaller.
3504   */
3505   assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3506 
3507   if ((nr= table->next_number_field->val_int()) != 0 ||
3508       (table->auto_increment_field_not_null &&
3509       thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3510   {
3511     /*
3512       Update next_insert_id if we had already generated a value in this
3513       statement (case of INSERT VALUES(null),(3763),(null):
3514       the last NULL needs to insert 3764, not the value of the first NULL plus
3515       1).
3516       Also we should take into account the the sign of the value.
3517       Since auto_increment value can't have negative value we should update
3518       next_insert_id only in case when we INSERTing explicit positive value.
3519       It means that for a table that has SIGNED INTEGER column when we execute
3520       the following statement
3521       INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3522       we shouldn't call adjust_next_insert_id_after_explicit_value()
3523       and the result row will be (1, -1, 2) (for new opened connection
3524       to the server). On the other hand, for the statement
3525       INSERT INTO t1 VALUES( NULL), (333), (NULL)
3526       we should call adjust_next_insert_id_after_explicit_value()
3527       and result row will be (1, 333, 334).
3528     */
3529     if (((Field_num*)table->next_number_field)->unsigned_flag ||
3530         ((longlong)nr) > 0)
3531       adjust_next_insert_id_after_explicit_value(nr);
3532 
3533     insert_id_for_cur_row= 0; // didn't generate anything
3534     DBUG_RETURN(0);
3535   }
3536 
3537   if (next_insert_id > table->next_number_field->get_max_int_value())
3538     DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);
3539 
3540   if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3541   {
3542     /* next_insert_id is beyond what is reserved, so we reserve more. */
3543     const Discrete_interval *forced=
3544       thd->auto_inc_intervals_forced.get_next();
3545     if (forced != NULL)
3546     {
3547       nr= forced->minimum();
3548       /*
3549         In a multi insert statement when the number of affected rows is known
3550         then reserve those many number of auto increment values. So that
3551         interval will be starting value to starting value + number of affected
3552         rows * increment of auto increment.
3553        */
3554       nb_reserved_values= (estimation_rows_to_insert > 0) ?
3555         estimation_rows_to_insert : forced->values();
3556     }
3557     else
3558     {
3559       /*
3560         handler::estimation_rows_to_insert was set by
3561         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3562       */
3563       ulonglong nb_desired_values;
3564       /*
3565         If an estimation was given to the engine:
3566         - use it.
3567         - if we already reserved numbers, it means the estimation was
3568         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3569         time, twice that the 3rd time etc.
3570         If no estimation was given, use those increasing defaults from the
3571         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3572         Don't go beyond a max to not reserve "way too much" (because
3573         reservation means potentially losing unused values).
3574         Note that in prelocked mode no estimation is given.
3575       */
3576 
3577       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3578         nb_desired_values= estimation_rows_to_insert;
3579       else if ((auto_inc_intervals_count == 0) &&
3580                (thd->lex->bulk_insert_row_cnt > 0))
3581       {
3582         /*
3583           For multi-row inserts, if the bulk inserts cannot be started, the
3584           handler::estimation_rows_to_insert will not be set. But we still
3585           want to reserve the autoinc values.
3586         */
3587         nb_desired_values= thd->lex->bulk_insert_row_cnt;
3588       }
3589       else /* go with the increasing defaults */
3590       {
3591         /* avoid overflow in formula, with this if() */
3592         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3593         {
3594           nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3595             (1 << auto_inc_intervals_count);
3596           set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3597         }
3598         else
3599           nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3600       }
3601       /* This call ignores all its parameters but nr, currently */
3602       get_auto_increment(variables->auto_increment_offset,
3603                          variables->auto_increment_increment,
3604                          nb_desired_values, &nr,
3605                          &nb_reserved_values);
3606       if (nr == ULLONG_MAX)
3607         DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
3608 
3609       /*
3610         That rounding below should not be needed when all engines actually
3611         respect offset and increment in get_auto_increment(). But they don't
3612         so we still do it. Wonder if for the not-first-in-index we should do
3613         it. Hope that this rounding didn't push us out of the interval; even
3614         if it did we cannot do anything about it (calling the engine again
3615         will not help as we inserted no row).
3616       */
3617       nr= compute_next_insert_id(nr-1, variables);
3618     }
3619 
3620     if (table->s->next_number_keypart == 0)
3621     {
3622       /* We must defer the appending until "nr" has been possibly truncated */
3623       append= TRUE;
3624     }
3625     else
3626     {
3627       /*
3628         For such auto_increment there is no notion of interval, just a
3629         singleton. The interval is not even stored in
3630         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3631         for next row.
3632       */
3633       DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3634     }
3635   }
3636 
3637   if (unlikely(nr == ULLONG_MAX))
3638       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3639 
3640   DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3641 
3642   if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3643   {
3644     /*
3645       first test if the query was aborted due to strict mode constraints
3646     */
3647     if (thd->killed == THD::KILL_BAD_DATA)
3648       DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3649 
3650     /*
3651       field refused this value (overflow) and truncated it, use the result of
3652       the truncation (which is going to be inserted); however we try to
3653       decrease it to honour auto_increment_* variables.
3654       That will shift the left bound of the reserved interval, we don't
3655       bother shifting the right bound (anyway any other value from this
3656       interval will cause a duplicate key).
3657     */
3658     nr= prev_insert_id(table->next_number_field->val_int(), variables);
3659     if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3660       nr= table->next_number_field->val_int();
3661   }
3662   if (append)
3663   {
3664     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3665                                           variables->auto_increment_increment);
3666     auto_inc_intervals_count++;
3667     /* Row-based replication does not need to store intervals in binlog */
3668     if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3669         thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3670                                                               auto_inc_interval_for_cur_row.values(),
3671                                                               variables->auto_increment_increment);
3672   }
3673 
3674   /*
3675     Record this autogenerated value. If the caller then
3676     succeeds to insert this value, it will call
3677     record_first_successful_insert_id_in_cur_stmt()
3678     which will set first_successful_insert_id_in_cur_stmt if it's not
3679     already set.
3680   */
3681   insert_id_for_cur_row= nr;
3682   /*
3683     Set next insert id to point to next auto-increment value to be able to
3684     handle multi-row statements.
3685   */
3686   set_next_insert_id(compute_next_insert_id(nr, variables));
3687 
3688   DBUG_RETURN(0);
3689 }
3690 
3691 
3692 /** @brief
3693   MySQL signal that it changed the column bitmap
3694 
3695   USAGE
3696     This is for handlers that needs to setup their own column bitmaps.
3697     Normally the handler should set up their own column bitmaps in
3698     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3699     this.
3700 
3701     The handler is allowd to do changes to the bitmap after a index_init or
3702     rnd_init() call is made as after this, MySQL will not use the bitmap
3703     for any program logic checking.
3704 */
column_bitmaps_signal()3705 void handler::column_bitmaps_signal()
3706 {
3707   DBUG_ENTER("column_bitmaps_signal");
3708   DBUG_PRINT("info", ("read_set: 0x%lx  write_set: 0x%lx", (long) table->read_set,
3709                       (long)table->write_set));
3710   DBUG_VOID_RETURN;
3711 }
3712 
3713 
3714 /**
3715   Reserves an interval of auto_increment values from the handler.
3716 
3717   @param       offset              offset (modulus increment)
3718   @param       increment           increment between calls
3719   @param       nb_desired_values   how many values we want
3720   @param[out]  first_value         the first value reserved by the handler
3721   @param[out]  nb_reserved_values  how many values the handler reserved
3722 
3723   offset and increment means that we want values to be of the form
3724   offset + N * increment, where N>=0 is integer.
3725   If the function sets *first_value to ULLONG_MAX it means an error.
3726   If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3727   reserved to "positive infinite".
3728 */
3729 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3730 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3731                                  ulonglong nb_desired_values,
3732                                  ulonglong *first_value,
3733                                  ulonglong *nb_reserved_values)
3734 {
3735   ulonglong nr;
3736   int error;
3737   DBUG_ENTER("handler::get_auto_increment");
3738 
3739   (void) extra(HA_EXTRA_KEYREAD);
3740   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3741                                         table->read_set);
3742   column_bitmaps_signal();
3743 
3744   if (ha_index_init(table->s->next_number_index, 1))
3745   {
3746     /* This should never happen, assert in debug, and fail in release build */
3747     assert(0);
3748     *first_value= ULLONG_MAX;
3749     DBUG_VOID_RETURN;
3750   }
3751 
3752   if (table->s->next_number_keypart == 0)
3753   {						// Autoincrement at key-start
3754     error= ha_index_last(table->record[1]);
3755     /*
3756       MySQL implicitely assumes such method does locking (as MySQL decides to
3757       use nr+increment without checking again with the handler, in
3758       handler::update_auto_increment()), so reserves to infinite.
3759     */
3760     *nb_reserved_values= ULLONG_MAX;
3761   }
3762   else
3763   {
3764     uchar key[MAX_KEY_LENGTH];
3765     key_copy(key, table->record[0],
3766              table->key_info + table->s->next_number_index,
3767              table->s->next_number_key_offset);
3768     error= ha_index_read_map(table->record[1], key,
3769                              make_prev_keypart_map(table->s->next_number_keypart),
3770                              HA_READ_PREFIX_LAST);
3771     /*
3772       MySQL needs to call us for next row: assume we are inserting ("a",null)
3773       here, we return 3, and next this statement will want to insert
3774       ("b",null): there is no reason why ("b",3+1) would be the good row to
3775       insert: maybe it already exists, maybe 3+1 is too large...
3776     */
3777     *nb_reserved_values= 1;
3778   }
3779 
3780   if (error)
3781   {
3782     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3783     {
3784       /* No entry found, start with 1. */
3785       nr= 1;
3786     }
3787     else
3788     {
3789       assert(0);
3790       nr= ULLONG_MAX;
3791     }
3792   }
3793   else
3794     nr= ((ulonglong) table->next_number_field->
3795          val_int_offset(table->s->rec_buff_length)+1);
3796   ha_index_end();
3797   (void) extra(HA_EXTRA_NO_KEYREAD);
3798   *first_value= nr;
3799   DBUG_VOID_RETURN;
3800 }
3801 
3802 
ha_release_auto_increment()3803 void handler::ha_release_auto_increment()
3804 {
3805   assert(table_share->tmp_table != NO_TMP_TABLE ||
3806          m_lock_type != F_UNLCK ||
3807          (!next_insert_id && !insert_id_for_cur_row));
3808   DEBUG_SYNC(ha_thd(), "release_auto_increment");
3809   release_auto_increment();
3810   insert_id_for_cur_row= 0;
3811   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3812   auto_inc_intervals_count= 0;
3813   if (next_insert_id > 0)
3814   {
3815     next_insert_id= 0;
3816     /*
3817       this statement used forced auto_increment values if there were some,
3818       wipe them away for other statements.
3819     */
3820     table->in_use->auto_inc_intervals_forced.empty();
3821   }
3822 }
3823 
3824 
3825 /**
3826   Construct and emit duplicate key error message using information
3827   from table's record buffer.
3828 
3829   @param table    TABLE object which record buffer should be used as
3830                   source for column values.
3831   @param key      Key description.
3832   @param msg      Error message template to which key value should be
3833                   added.
3834   @param errflag  Flags for my_error() call.
3835 */
3836 
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3837 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3838 {
3839   /* Write the duplicated key in the error message */
3840   char key_buff[MAX_KEY_LENGTH];
3841   String str(key_buff,sizeof(key_buff),system_charset_info);
3842 
3843   if (key == NULL)
3844   {
3845     /* Key is unknown */
3846     str.copy("", 0, system_charset_info);
3847     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3848   }
3849   else
3850   {
3851     /* Table is opened and defined at this point */
3852     key_unpack(&str,table, key);
3853     size_t max_length= MYSQL_ERRMSG_SIZE - strlen(msg);
3854     if (str.length() >= max_length)
3855     {
3856       str.length(max_length-4);
3857       str.append(STRING_WITH_LEN("..."));
3858     }
3859     my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3860   }
3861 }
3862 
3863 
3864 /**
3865   Construct and emit duplicate key error message using information
3866   from table's record buffer.
3867 
3868   @sa print_keydup_error(table, key, msg, errflag).
3869 */
3870 
print_keydup_error(TABLE * table,KEY * key,myf errflag)3871 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3872 {
3873   print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3874 }
3875 
3876 
3877 /**
3878   This method is used to analyse the error to see whether the error
3879   is ignorable or not. Further comments in header file.
3880 */
3881 
is_ignorable_error(int error)3882 bool handler::is_ignorable_error(int error)
3883 {
3884   DBUG_ENTER("is_ignorable_error");
3885 
3886   // Catch errors that are ignorable
3887   switch (error)
3888   {
3889     // Error code 0 is not an error.
3890     case 0:
3891     // Dup key errors may be explicitly ignored.
3892     case HA_ERR_FOUND_DUPP_KEY:
3893     case HA_ERR_FOUND_DUPP_UNIQUE:
3894     // Foreign key constraint violations are ignorable.
3895     case HA_ERR_ROW_IS_REFERENCED:
3896     case HA_ERR_NO_REFERENCED_ROW:
3897       DBUG_RETURN(true);
3898   }
3899 
3900   // Default is that an error is not ignorable.
3901   DBUG_RETURN(false);
3902 }
3903 
3904 
3905 /**
3906   This method is used to analyse the error to see whether the error
3907   is fatal or not. Further comments in header file.
3908 */
3909 
is_fatal_error(int error)3910 bool handler::is_fatal_error(int error)
3911 {
3912   DBUG_ENTER("is_fatal_error");
3913 
3914   // No ignorable errors are fatal
3915   if (is_ignorable_error(error))
3916     DBUG_RETURN(false);
3917 
3918   // Catch errors that are not fatal
3919   switch (error)
3920   {
3921     /*
3922       Deadlock and lock timeout cause transaction/statement rollback so that
3923       THD::is_fatal_sub_stmt_error will be set. This means that they will not
3924       be possible to handle by stored program handlers inside stored functions
3925       and triggers even if non-fatal.
3926     */
3927     case HA_ERR_LOCK_WAIT_TIMEOUT:
3928     case HA_ERR_LOCK_DEADLOCK:
3929       DBUG_RETURN(false);
3930 
3931     case HA_ERR_NULL_IN_SPATIAL:
3932       DBUG_RETURN(false);
3933   }
3934 
3935   // Default is that an error is fatal
3936   DBUG_RETURN(true);
3937 }
3938 
3939 
3940 /**
3941   Print error that we got from handler function.
3942 
3943   @note
3944     In case of delete table it's only safe to use the following parts of
3945     the 'table' structure:
3946     - table->s->path
3947     - table->alias
3948 */
print_error(int error,myf errflag)3949 void handler::print_error(int error, myf errflag)
3950 {
3951   DBUG_ENTER("handler::print_error");
3952   DBUG_PRINT("enter",("error: %d",error));
3953 
3954   int textno=ER_GET_ERRNO;
3955   switch (error) {
3956   case EACCES:
3957     textno=ER_OPEN_AS_READONLY;
3958     break;
3959   case EAGAIN:
3960     textno=ER_FILE_USED;
3961     break;
3962   case ENOENT:
3963     {
3964       char errbuf[MYSYS_STRERROR_SIZE];
3965       textno=ER_FILE_NOT_FOUND;
3966       my_error(textno, errflag, table_share->table_name.str,
3967                error, my_strerror(errbuf, sizeof(errbuf), error));
3968     }
3969     break;
3970   case HA_ERR_KEY_NOT_FOUND:
3971   case HA_ERR_NO_ACTIVE_RECORD:
3972   case HA_ERR_RECORD_DELETED:
3973   case HA_ERR_END_OF_FILE:
3974     textno=ER_KEY_NOT_FOUND;
3975     break;
3976   case HA_ERR_WRONG_MRG_TABLE_DEF:
3977     textno=ER_WRONG_MRG_TABLE;
3978     break;
3979   case HA_ERR_FOUND_DUPP_KEY:
3980   {
3981     uint key_nr= table ? get_dup_key(error) : -1;
3982     if ((int) key_nr >= 0)
3983     {
3984       print_keydup_error(table,
3985                          key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
3986                          errflag);
3987       DBUG_VOID_RETURN;
3988     }
3989     textno=ER_DUP_KEY;
3990     break;
3991   }
3992   case HA_ERR_FOREIGN_DUPLICATE_KEY:
3993   {
3994     assert(table_share->tmp_table != NO_TMP_TABLE ||
3995            m_lock_type != F_UNLCK);
3996 
3997     char rec_buf[MAX_KEY_LENGTH];
3998     String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3999     /* Table is opened and defined at this point */
4000 
4001     /*
4002       Just print the subset of fields that are part of the first index,
4003       printing the whole row from there is not easy.
4004     */
4005     key_unpack(&rec, table, &table->key_info[0]);
4006 
4007     char child_table_name[NAME_LEN + 1];
4008     char child_key_name[NAME_LEN + 1];
4009     if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4010                             child_key_name, sizeof(child_key_name)))
4011     {
4012       my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4013                table_share->table_name.str, rec.c_ptr_safe(),
4014                child_table_name, child_key_name);
4015     }
4016     else
4017     {
4018       my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4019                table_share->table_name.str, rec.c_ptr_safe());
4020     }
4021     DBUG_VOID_RETURN;
4022   }
4023   case HA_ERR_NULL_IN_SPATIAL:
4024     my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4025     DBUG_VOID_RETURN;
4026   case HA_ERR_FOUND_DUPP_UNIQUE:
4027     textno=ER_DUP_UNIQUE;
4028     break;
4029   case HA_ERR_RECORD_CHANGED:
4030     textno=ER_CHECKREAD;
4031     break;
4032   case HA_ERR_CRASHED:
4033     textno=ER_NOT_KEYFILE;
4034     break;
4035   case HA_ERR_WRONG_IN_RECORD:
4036     textno= ER_CRASHED_ON_USAGE;
4037     break;
4038   case HA_ERR_CRASHED_ON_USAGE:
4039     textno=ER_CRASHED_ON_USAGE;
4040     break;
4041   case HA_ERR_NOT_A_TABLE:
4042     textno= error;
4043     break;
4044   case HA_ERR_CRASHED_ON_REPAIR:
4045     textno=ER_CRASHED_ON_REPAIR;
4046     break;
4047   case HA_ERR_OUT_OF_MEM:
4048     textno=ER_OUT_OF_RESOURCES;
4049     break;
4050   case HA_ERR_SE_OUT_OF_MEMORY:
4051     my_error(ER_ENGINE_OUT_OF_MEMORY, errflag,
4052              table->file->table_type());
4053     DBUG_VOID_RETURN;
4054   case HA_ERR_WRONG_COMMAND:
4055     textno=ER_ILLEGAL_HA;
4056     break;
4057   case HA_ERR_OLD_FILE:
4058     textno=ER_OLD_KEYFILE;
4059     break;
4060   case HA_ERR_UNSUPPORTED:
4061     textno=ER_UNSUPPORTED_EXTENSION;
4062     break;
4063   case HA_ERR_RECORD_FILE_FULL:
4064   case HA_ERR_INDEX_FILE_FULL:
4065   {
4066     textno=ER_RECORD_FILE_FULL;
4067     /* Write the error message to error log */
4068     errflag|= ME_ERRORLOG;
4069     break;
4070   }
4071   case HA_ERR_LOCK_WAIT_TIMEOUT:
4072     textno=ER_LOCK_WAIT_TIMEOUT;
4073     break;
4074   case HA_ERR_LOCK_TABLE_FULL:
4075     textno=ER_LOCK_TABLE_FULL;
4076     break;
4077   case HA_ERR_LOCK_DEADLOCK:
4078     textno=ER_LOCK_DEADLOCK;
4079     break;
4080   case HA_ERR_READ_ONLY_TRANSACTION:
4081     textno=ER_READ_ONLY_TRANSACTION;
4082     break;
4083   case HA_ERR_CANNOT_ADD_FOREIGN:
4084     textno=ER_CANNOT_ADD_FOREIGN;
4085     break;
4086   case HA_ERR_ROW_IS_REFERENCED:
4087   {
4088     String str;
4089     get_error_message(error, &str);
4090     my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4091     DBUG_VOID_RETURN;
4092   }
4093   case HA_ERR_NO_REFERENCED_ROW:
4094   {
4095     String str;
4096     get_error_message(error, &str);
4097     my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4098     DBUG_VOID_RETURN;
4099   }
4100   case HA_ERR_TABLE_DEF_CHANGED:
4101     textno=ER_TABLE_DEF_CHANGED;
4102     break;
4103   case HA_ERR_NO_SUCH_TABLE:
4104     my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4105              table_share->table_name.str);
4106     DBUG_VOID_RETURN;
4107   case HA_ERR_RBR_LOGGING_FAILED:
4108     textno= ER_BINLOG_ROW_LOGGING_FAILED;
4109     break;
4110   case HA_ERR_DROP_INDEX_FK:
4111   {
4112     const char *ptr= "???";
4113     uint key_nr= table ? get_dup_key(error) : -1;
4114     if ((int) key_nr >= 0 && key_nr != MAX_KEY)
4115       ptr= table->key_info[key_nr].name;
4116     my_error(ER_DROP_INDEX_FK, errflag, ptr);
4117     DBUG_VOID_RETURN;
4118   }
4119   case HA_ERR_TABLE_NEEDS_UPGRADE:
4120     textno=ER_TABLE_NEEDS_UPGRADE;
4121     break;
4122   case HA_ERR_NO_PARTITION_FOUND:
4123     textno=ER_WRONG_PARTITION_NAME;
4124     break;
4125   case HA_ERR_TABLE_READONLY:
4126     textno= ER_OPEN_AS_READONLY;
4127     break;
4128   case HA_ERR_AUTOINC_READ_FAILED:
4129     textno= ER_AUTOINC_READ_FAILED;
4130     break;
4131   case HA_ERR_AUTOINC_ERANGE:
4132     textno= ER_WARN_DATA_OUT_OF_RANGE;
4133     break;
4134   case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4135     textno= ER_TOO_MANY_CONCURRENT_TRXS;
4136     break;
4137   case HA_ERR_INDEX_COL_TOO_LONG:
4138     textno= ER_INDEX_COLUMN_TOO_LONG;
4139     break;
4140   case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4141     textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4142     break;
4143   case HA_ERR_INDEX_CORRUPT:
4144     textno= ER_INDEX_CORRUPT;
4145     break;
4146   case HA_ERR_UNDO_REC_TOO_BIG:
4147     textno= ER_UNDO_RECORD_TOO_BIG;
4148     break;
4149   case HA_ERR_TABLE_IN_FK_CHECK:
4150     textno= ER_TABLE_IN_FK_CHECK;
4151     break;
4152   case HA_WRONG_CREATE_OPTION:
4153     textno= ER_ILLEGAL_HA;
4154     break;
4155   case HA_MISSING_CREATE_OPTION:
4156   {
4157     const char* engine= table_type();
4158     my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4159     DBUG_VOID_RETURN;
4160   }
4161   case HA_ERR_TOO_MANY_FIELDS:
4162     textno= ER_TOO_MANY_FIELDS;
4163     break;
4164   case HA_ERR_INNODB_READ_ONLY:
4165     textno= ER_INNODB_READ_ONLY;
4166     break;
4167   case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4168     textno= ER_TEMP_FILE_WRITE_FAILURE;
4169     break;
4170   case HA_ERR_INNODB_FORCED_RECOVERY:
4171     textno= ER_INNODB_FORCED_RECOVERY;
4172     break;
4173   case HA_ERR_TABLE_CORRUPT:
4174     my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4175              table_share->table_name.str);
4176     DBUG_VOID_RETURN;
4177   case HA_ERR_QUERY_INTERRUPTED:
4178     textno= ER_QUERY_INTERRUPTED;
4179     break;
4180   case HA_ERR_TABLESPACE_MISSING:
4181   {
4182     char errbuf[MYSYS_STRERROR_SIZE];
4183     my_snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4184     table_share->table_name.str);
4185     my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4186     DBUG_VOID_RETURN;
4187   }
4188   case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4189     my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4190              table_share->table_name.str);
4191     DBUG_VOID_RETURN;
4192   case HA_ERR_WRONG_FILE_NAME:
4193     my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4194     DBUG_VOID_RETURN;
4195   case HA_ERR_NOT_ALLOWED_COMMAND:
4196     textno=ER_NOT_ALLOWED_COMMAND;
4197     break;
4198   default:
4199     {
4200       /* The error was "unknown" to this function.
4201 	 Ask handler if it has got a message for this error */
4202       String str;
4203       bool temporary= get_error_message(error, &str);
4204       if (!str.is_empty())
4205       {
4206 	const char* engine= table_type();
4207 	if (temporary)
4208 	  my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4209 	else
4210 	  my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4211       }
4212       else
4213 	my_error(ER_GET_ERRNO,errflag,error);
4214       DBUG_VOID_RETURN;
4215     }
4216   }
4217   if (textno != ER_FILE_NOT_FOUND)
4218     my_error(textno, errflag, table_share->table_name.str, error);
4219   DBUG_VOID_RETURN;
4220 }
4221 
4222 
4223 /**
4224   Return an error message specific to this handler.
4225 
4226   @param error  error code previously returned by handler
4227   @param buf    pointer to String where to add error message
4228 
4229   @return
4230     Returns true if this is a temporary error
4231 */
get_error_message(int error,String * buf)4232 bool handler::get_error_message(int error, String* buf)
4233 {
4234   return FALSE;
4235 }
4236 
4237 
4238 /**
4239   Check for incompatible collation changes.
4240 
4241   @retval
4242     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4243   @retval
4244     0                        No upgrade required.
4245 */
4246 
check_collation_compatibility()4247 int handler::check_collation_compatibility()
4248 {
4249   ulong mysql_version= table->s->mysql_version;
4250 
4251   if (mysql_version < 50124)
4252   {
4253     KEY *key= table->key_info;
4254     KEY *key_end= key + table->s->keys;
4255     for (; key < key_end; key++)
4256     {
4257       KEY_PART_INFO *key_part= key->key_part;
4258       KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4259       for (; key_part < key_part_end; key_part++)
4260       {
4261         if (!key_part->fieldnr)
4262           continue;
4263         Field *field= table->field[key_part->fieldnr - 1];
4264         uint cs_number= field->charset()->number;
4265         if ((mysql_version < 50048 &&
4266              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4267               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4268               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4269               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4270               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4271               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4272               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4273               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4274              (mysql_version < 50124 &&
4275              (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4276               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4277           return HA_ADMIN_NEEDS_UPGRADE;
4278       }
4279     }
4280   }
4281   return 0;
4282 }
4283 
4284 
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4285 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4286 {
4287   int error;
4288   KEY *keyinfo, *keyend;
4289   KEY_PART_INFO *keypart, *keypartend;
4290 
4291   if (!table->s->mysql_version)
4292   {
4293     /* check for blob-in-key error */
4294     keyinfo= table->key_info;
4295     keyend= table->key_info + table->s->keys;
4296     for (; keyinfo < keyend; keyinfo++)
4297     {
4298       keypart= keyinfo->key_part;
4299       keypartend= keypart + keyinfo->user_defined_key_parts;
4300       for (; keypart < keypartend; keypart++)
4301       {
4302         if (!keypart->fieldnr)
4303           continue;
4304         Field *field= table->field[keypart->fieldnr-1];
4305         if (field->type() == MYSQL_TYPE_BLOB)
4306         {
4307           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4308             check_opt->flags= T_MEDIUM;
4309           return HA_ADMIN_NEEDS_CHECK;
4310         }
4311       }
4312     }
4313   }
4314   if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4315     return HA_ADMIN_NEEDS_ALTER;
4316 
4317   if ((error= check_collation_compatibility()))
4318     return error;
4319 
4320   return check_for_upgrade(check_opt);
4321 }
4322 
4323 
check_old_types()4324 int handler::check_old_types()
4325 {
4326   Field** field;
4327 
4328   for (field= table->field; (*field); field++)
4329   {
4330     if (table->s->mysql_version == 0) // prior to MySQL 5.0
4331     {
4332       /* check for bad DECIMAL field */
4333       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4334       {
4335         return HA_ADMIN_NEEDS_ALTER;
4336       }
4337       if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4338       {
4339         return HA_ADMIN_NEEDS_ALTER;
4340       }
4341     }
4342 
4343     /*
4344       Check for old DECIMAL field.
4345 
4346       Above check does not take into account for pre 5.0 decimal types which can
4347       be present in the data directory if user did in-place upgrade from
4348       mysql-4.1 to mysql-5.0.
4349     */
4350     if ((*field)->type() == MYSQL_TYPE_DECIMAL)
4351     {
4352       return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4353     }
4354 
4355     if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4356       return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4357 
4358     //Check for old temporal format if avoid_temporal_upgrade is disabled.
4359     mysql_mutex_lock(&LOCK_global_system_variables);
4360     bool check_temporal_upgrade= !avoid_temporal_upgrade;
4361     mysql_mutex_unlock(&LOCK_global_system_variables);
4362 
4363     if (check_temporal_upgrade)
4364     {
4365       if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4366           ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4367           ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4368         return HA_ADMIN_NEEDS_ALTER;
4369     }
4370   }
4371   return 0;
4372 }
4373 
4374 
update_frm_version(TABLE * table)4375 static bool update_frm_version(TABLE *table)
4376 {
4377   char path[FN_REFLEN];
4378   File file;
4379   int result= 1;
4380   DBUG_ENTER("update_frm_version");
4381 
4382   /*
4383     No need to update frm version in case table was created or checked
4384     by server with the same version. This also ensures that we do not
4385     update frm version for temporary tables as this code doesn't support
4386     temporary tables.
4387   */
4388   if (table->s->mysql_version == MYSQL_VERSION_ID)
4389     DBUG_RETURN(0);
4390 
4391   strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4392 
4393   if ((file= mysql_file_open(key_file_frm,
4394                              path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4395   {
4396     uchar version[4];
4397 
4398     int4store(version, MYSQL_VERSION_ID);
4399 
4400     if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4401       goto err;
4402 
4403     table->s->mysql_version= MYSQL_VERSION_ID;
4404   }
4405 err:
4406   if (file >= 0)
4407     (void) mysql_file_close(file, MYF(MY_WME));
4408   DBUG_RETURN(result);
4409 }
4410 
4411 
4412 
4413 /**
4414   @return
4415     key if error because of duplicated keys
4416 */
get_dup_key(int error)4417 uint handler::get_dup_key(int error)
4418 {
4419   assert(table_share->tmp_table != NO_TMP_TABLE ||
4420          m_lock_type != F_UNLCK);
4421   DBUG_ENTER("handler::get_dup_key");
4422   table->file->errkey  = (uint) -1;
4423   if (error == HA_ERR_FOUND_DUPP_KEY ||
4424       error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4425       error == HA_ERR_DROP_INDEX_FK)
4426     table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4427   DBUG_RETURN(table->file->errkey);
4428 }
4429 
4430 
4431 /**
4432   Delete all files with extension from bas_ext().
4433 
4434   @param name		Base name of table
4435 
4436   @note
4437     We assume that the handler may return more extensions than
4438     was actually used for the file.
4439 
4440   @retval
4441     0   If we successfully deleted at least one file from base_ext and
4442     didn't get any other errors than ENOENT
4443   @retval
4444     !0  Error
4445 */
delete_table(const char * name)4446 int handler::delete_table(const char *name)
4447 {
4448   int saved_error= 0;
4449   int error= 0;
4450   int enoent_or_zero= ENOENT;                   // Error if no file was deleted
4451   char buff[FN_REFLEN];
4452   assert(m_lock_type == F_UNLCK);
4453 
4454   for (const char **ext=bas_ext(); *ext ; ext++)
4455   {
4456     fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4457     if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4458     {
4459       if (my_errno() != ENOENT)
4460       {
4461         /*
4462           If error on the first existing file, return the error.
4463           Otherwise delete as much as possible.
4464         */
4465         if (enoent_or_zero)
4466           return my_errno();
4467 	saved_error= my_errno();
4468       }
4469     }
4470     else
4471       enoent_or_zero= 0;                        // No error for ENOENT
4472     error= enoent_or_zero;
4473   }
4474   return saved_error ? saved_error : error;
4475 }
4476 
4477 
rename_table(const char * from,const char * to)4478 int handler::rename_table(const char * from, const char * to)
4479 {
4480   int error= 0;
4481   const char **ext, **start_ext;
4482   start_ext= bas_ext();
4483   for (ext= start_ext; *ext ; ext++)
4484   {
4485     if (rename_file_ext(from, to, *ext))
4486     {
4487       error= my_errno();
4488       if (error != ENOENT)
4489 	break;
4490       error= 0;
4491     }
4492   }
4493   if (error)
4494   {
4495     /* Try to revert the rename. Ignore errors. */
4496     for (; ext >= start_ext; ext--)
4497       rename_file_ext(to, from, *ext);
4498   }
4499   return error;
4500 }
4501 
4502 
drop_table(const char * name)4503 void handler::drop_table(const char *name)
4504 {
4505   close();
4506   delete_table(name);
4507 }
4508 
4509 
4510 /**
4511   Performs checks upon the table.
4512 
4513   @param thd                thread doing CHECK TABLE operation
4514   @param check_opt          options from the parser
4515 
4516   @retval
4517     HA_ADMIN_OK               Successful upgrade
4518   @retval
4519     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4520   @retval
4521     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4522   @retval
4523     HA_ADMIN_NOT_IMPLEMENTED
4524 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4525 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4526 {
4527   int error;
4528   bool skip_version_update = false;
4529   bool is_upgrade = check_opt->sql_flags & TT_FOR_UPGRADE;
4530 
4531   assert(table_share->tmp_table != NO_TMP_TABLE ||
4532          m_lock_type != F_UNLCK);
4533 
4534   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4535       (check_opt->sql_flags & TT_FOR_UPGRADE))
4536     return 0;
4537 
4538   if (table->s->mysql_version < MYSQL_VERSION_ID)
4539   {
4540     if ((error= check_old_types()))
4541       return error;
4542 
4543     error= ha_check_for_upgrade(check_opt);
4544     switch (error)
4545     {
4546       case HA_ADMIN_NEEDS_UPG_PART:
4547         /* Skip version update as the table needs upgrade. */
4548         skip_version_update= true;
4549         /* Fall through */
4550       case HA_ADMIN_OK:
4551         if (is_upgrade)
4552           return error;
4553         /* Fall through */
4554       case HA_ADMIN_NEEDS_CHECK:
4555         break;
4556       default:
4557         return error;
4558     }
4559   }
4560 
4561   if ((error= check(thd, check_opt)))
4562     return error;
4563   /* Skip updating frm version if not main handler. */
4564   if (table->file != this || skip_version_update)
4565     return error;
4566   return update_frm_version(table);
4567 }
4568 
4569 void
mark_trx_noop_dml()4570 handler::mark_trx_noop_dml()
4571 {
4572   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4573   /*
4574     When a storage engine method is called, the transaction must
4575     have been started, unless it's a DDL call, for which the
4576     storage engine starts the transaction internally, and commits
4577     it internally, without registering in the ha_list.
4578     Unfortunately here we can't know for sure if the engine
4579     has registered the transaction or not, so we must check.
4580   */
4581   if (ha_info->is_started())
4582   {
4583     assert(has_transactions());
4584     /*
4585       table_share can be NULL in ha_delete_table(). See implementation
4586       of standalone function ha_delete_table() in sql_base.cc.
4587     */
4588     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4589       ha_info->set_trx_noop_read_write();
4590   }
4591 }
4592 
4593 /**
4594   A helper function to mark a transaction read-write,
4595   if it is started.
4596 */
4597 
4598 void
mark_trx_read_write()4599 handler::mark_trx_read_write()
4600 {
4601   Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4602   /*
4603     When a storage engine method is called, the transaction must
4604     have been started, unless it's a DDL call, for which the
4605     storage engine starts the transaction internally, and commits
4606     it internally, without registering in the ha_list.
4607     Unfortunately here we can't know for sure if the engine
4608     has registered the transaction or not, so we must check.
4609   */
4610   if (ha_info->is_started())
4611   {
4612     assert(has_transactions());
4613     /*
4614       table_share can be NULL in ha_delete_table(). See implementation
4615       of standalone function ha_delete_table() in sql_base.cc.
4616     */
4617     if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4618       ha_info->set_trx_read_write();
4619   }
4620 }
4621 
4622 
4623 /**
4624   Repair table: public interface.
4625 
4626   @sa handler::repair()
4627 */
4628 
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4629 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4630 {
4631   int result;
4632   mark_trx_read_write();
4633 
4634   result= repair(thd, check_opt);
4635   assert(result == HA_ADMIN_NOT_IMPLEMENTED ||
4636          ha_table_flags() & HA_CAN_REPAIR);
4637 
4638   int old_types_error= check_old_types();
4639 
4640   if (old_types_error != HA_ADMIN_NEEDS_DUMP_UPGRADE && result == HA_ADMIN_OK)
4641     result= update_frm_version(table);
4642   return result;
4643 }
4644 
4645 
4646 /**
4647   Start bulk insert.
4648 
4649   Allow the handler to optimize for multiple row insert.
4650 
4651   @param rows  Estimated rows to insert
4652 */
4653 
ha_start_bulk_insert(ha_rows rows)4654 void handler::ha_start_bulk_insert(ha_rows rows)
4655 {
4656   DBUG_ENTER("handler::ha_start_bulk_insert");
4657   assert(table_share->tmp_table != NO_TMP_TABLE ||
4658          m_lock_type == F_WRLCK);
4659   estimation_rows_to_insert= rows;
4660   start_bulk_insert(rows);
4661   DBUG_VOID_RETURN;
4662 }
4663 
4664 
4665 /**
4666   End bulk insert.
4667 
4668   @return Operation status
4669     @retval 0     Success
4670     @retval != 0  Failure (error code returned)
4671 */
4672 
ha_end_bulk_insert()4673 int handler::ha_end_bulk_insert()
4674 {
4675   DBUG_ENTER("handler::ha_end_bulk_insert");
4676   assert(table_share->tmp_table != NO_TMP_TABLE ||
4677          m_lock_type == F_WRLCK);
4678   estimation_rows_to_insert= 0;
4679   DBUG_RETURN(end_bulk_insert());
4680 }
4681 
4682 
4683 /**
4684   Bulk update row: public interface.
4685 
4686   @sa handler::bulk_update_row()
4687 */
4688 
4689 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4690 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4691                             uint *dup_key_found)
4692 {
4693   assert(table_share->tmp_table != NO_TMP_TABLE ||
4694          m_lock_type == F_WRLCK);
4695   mark_trx_read_write();
4696 
4697   return bulk_update_row(old_data, new_data, dup_key_found);
4698 }
4699 
4700 
4701 /**
4702   Delete all rows: public interface.
4703 
4704   @sa handler::delete_all_rows()
4705 */
4706 
4707 int
ha_delete_all_rows()4708 handler::ha_delete_all_rows()
4709 {
4710   assert(table_share->tmp_table != NO_TMP_TABLE ||
4711          m_lock_type == F_WRLCK);
4712   mark_trx_read_write();
4713 
4714   return delete_all_rows();
4715 }
4716 
4717 
4718 /**
4719   Truncate table: public interface.
4720 
4721   @sa handler::truncate()
4722 */
4723 
4724 int
ha_truncate()4725 handler::ha_truncate()
4726 {
4727   assert(table_share->tmp_table != NO_TMP_TABLE ||
4728          m_lock_type == F_WRLCK);
4729   mark_trx_read_write();
4730 
4731   return truncate();
4732 }
4733 
4734 
4735 /**
4736   Optimize table: public interface.
4737 
4738   @sa handler::optimize()
4739 */
4740 
4741 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4742 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4743 {
4744   assert(table_share->tmp_table != NO_TMP_TABLE ||
4745          m_lock_type == F_WRLCK);
4746   mark_trx_read_write();
4747 
4748   return optimize(thd, check_opt);
4749 }
4750 
4751 
4752 /**
4753   Analyze table: public interface.
4754 
4755   @sa handler::analyze()
4756 */
4757 
4758 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4759 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4760 {
4761   assert(table_share->tmp_table != NO_TMP_TABLE ||
4762          m_lock_type != F_UNLCK);
4763   mark_trx_read_write();
4764 
4765   return analyze(thd, check_opt);
4766 }
4767 
4768 
4769 /**
4770   Check and repair table: public interface.
4771 
4772   @sa handler::check_and_repair()
4773 */
4774 
4775 bool
ha_check_and_repair(THD * thd)4776 handler::ha_check_and_repair(THD *thd)
4777 {
4778   assert(table_share->tmp_table != NO_TMP_TABLE ||
4779          m_lock_type == F_UNLCK);
4780   mark_trx_read_write();
4781 
4782   return check_and_repair(thd);
4783 }
4784 
4785 
4786 /**
4787   Disable indexes: public interface.
4788 
4789   @sa handler::disable_indexes()
4790 */
4791 
4792 int
ha_disable_indexes(uint mode)4793 handler::ha_disable_indexes(uint mode)
4794 {
4795   assert(table_share->tmp_table != NO_TMP_TABLE ||
4796          m_lock_type != F_UNLCK);
4797   mark_trx_read_write();
4798 
4799   return disable_indexes(mode);
4800 }
4801 
4802 
4803 /**
4804   Enable indexes: public interface.
4805 
4806   @sa handler::enable_indexes()
4807 */
4808 
4809 int
ha_enable_indexes(uint mode)4810 handler::ha_enable_indexes(uint mode)
4811 {
4812   assert(table_share->tmp_table != NO_TMP_TABLE ||
4813          m_lock_type != F_UNLCK);
4814   mark_trx_read_write();
4815 
4816   return enable_indexes(mode);
4817 }
4818 
4819 
4820 /**
4821   Discard or import tablespace: public interface.
4822 
4823   @sa handler::discard_or_import_tablespace()
4824 */
4825 
4826 int
ha_discard_or_import_tablespace(my_bool discard)4827 handler::ha_discard_or_import_tablespace(my_bool discard)
4828 {
4829   assert(table_share->tmp_table != NO_TMP_TABLE ||
4830          m_lock_type == F_WRLCK);
4831   mark_trx_read_write();
4832 
4833   return discard_or_import_tablespace(discard);
4834 }
4835 
4836 
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4837 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4838                                              Alter_inplace_info *ha_alter_info)
4839 {
4840   assert(table_share->tmp_table != NO_TMP_TABLE ||
4841          m_lock_type != F_UNLCK);
4842   mark_trx_read_write();
4843 
4844   return prepare_inplace_alter_table(altered_table, ha_alter_info);
4845 }
4846 
4847 
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4848 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4849                                             Alter_inplace_info *ha_alter_info,
4850                                             bool commit)
4851 {
4852    /*
4853      At this point we should have an exclusive metadata lock on the table.
4854      The exception is if we're about to roll back changes (commit= false).
4855      In this case, we might be rolling back after a failed lock upgrade,
4856      so we could be holding the same lock level as for inplace_alter_table().
4857    */
4858   assert(ha_thd()->mdl_context.owns_equal_or_stronger_lock(MDL_key::TABLE,
4859                                                            table->s->db.str,
4860                                                            table->s->table_name.str,
4861                                                            MDL_EXCLUSIVE) ||
4862          !commit);
4863 
4864    return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4865 }
4866 
4867 
4868 /*
4869    Default implementation to support in-place alter table
4870    and old online add/drop index API
4871 */
4872 
4873 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4874 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4875                                           Alter_inplace_info *ha_alter_info)
4876 {
4877   DBUG_ENTER("check_if_supported_alter");
4878 
4879   HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4880 
4881   Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4882     Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4883     Alter_inplace_info::ALTER_COLUMN_NAME |
4884     Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4885     Alter_inplace_info::CHANGE_CREATE_OPTION |
4886     Alter_inplace_info::ALTER_RENAME |
4887     Alter_inplace_info::RENAME_INDEX |
4888     Alter_inplace_info::ALTER_INDEX_COMMENT |
4889     Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4890 
4891   /* Is there at least one operation that requires copy algorithm? */
4892   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4893     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4894 
4895   /*
4896     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4897     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4898     change column charsets and so not supported in-place through
4899     old API.
4900 
4901     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4902     not supported as in-place operations in old API either.
4903   */
4904   if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4905                                   HA_CREATE_USED_DEFAULT_CHARSET |
4906                                   HA_CREATE_USED_PACK_KEYS |
4907                                   HA_CREATE_USED_MAX_ROWS) ||
4908       (table->s->row_type != create_info->row_type))
4909     DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4910 
4911   uint table_changes= (ha_alter_info->handler_flags &
4912                        Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4913     IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4914   if (table->file->check_if_incompatible_data(create_info, table_changes)
4915       == COMPATIBLE_DATA_YES)
4916     DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
4917 
4918   DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4919 }
4920 
4921 
4922 /*
4923    Default implementation to support in-place alter table
4924    and old online add/drop index API
4925 */
4926 
notify_table_changed()4927 void handler::notify_table_changed()
4928 {
4929   ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
4930 }
4931 
4932 
report_unsupported_error(const char * not_supported,const char * try_instead)4933 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4934                                                   const char *try_instead)
4935 {
4936   if (unsupported_reason == NULL)
4937     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4938              not_supported, try_instead);
4939   else
4940     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4941              not_supported, unsupported_reason, try_instead);
4942 }
4943 
4944 
4945 /**
4946   Rename table: public interface.
4947 
4948   @sa handler::rename_table()
4949 */
4950 
4951 int
ha_rename_table(const char * from,const char * to)4952 handler::ha_rename_table(const char *from, const char *to)
4953 {
4954   assert(m_lock_type == F_UNLCK);
4955   mark_trx_read_write();
4956 
4957   return rename_table(from, to);
4958 }
4959 
4960 
4961 /**
4962   Delete table: public interface.
4963 
4964   @sa handler::delete_table()
4965 */
4966 
4967 int
ha_delete_table(const char * name)4968 handler::ha_delete_table(const char *name)
4969 {
4970   assert(m_lock_type == F_UNLCK);
4971   mark_trx_read_write();
4972 
4973   return delete_table(name);
4974 }
4975 
4976 
4977 /**
4978   Drop table in the engine: public interface.
4979 
4980   @sa handler::drop_table()
4981 */
4982 
4983 void
ha_drop_table(const char * name)4984 handler::ha_drop_table(const char *name)
4985 {
4986   assert(m_lock_type == F_UNLCK);
4987   mark_trx_read_write();
4988 
4989   return drop_table(name);
4990 }
4991 
4992 
4993 /**
4994   Create a table in the engine: public interface.
4995 
4996   @sa handler::create()
4997 */
4998 
4999 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)5000 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
5001 {
5002   assert(m_lock_type == F_UNLCK);
5003   mark_trx_read_write();
5004 
5005   return create(name, form, info);
5006 }
5007 
5008 
5009 /**
5010   Create handler files for CREATE TABLE: public interface.
5011 
5012   @sa handler::create_handler_files()
5013 */
5014 
5015 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)5016 handler::ha_create_handler_files(const char *name, const char *old_name,
5017                         int action_flag, HA_CREATE_INFO *info)
5018 {
5019   /*
5020     Normally this is done when unlocked, but in fast_alter_partition_table,
5021     it is done on an already locked handler when preparing to alter/rename
5022     partitions.
5023   */
5024   assert(m_lock_type == F_UNLCK ||
5025          (!old_name && strcmp(name, table_share->path.str)));
5026   mark_trx_read_write();
5027 
5028   return create_handler_files(name, old_name, action_flag, info);
5029 }
5030 
5031 
5032 /**
5033   Tell the storage engine that it is allowed to "disable transaction" in the
5034   handler. It is a hint that ACID is not required - it is used in NDB for
5035   ALTER TABLE, for example, when data are copied to temporary table.
5036   A storage engine may treat this hint any way it likes. NDB for example
5037   starts to commit every now and then automatically.
5038   This hint can be safely ignored.
5039 */
ha_enable_transaction(THD * thd,bool on)5040 int ha_enable_transaction(THD *thd, bool on)
5041 {
5042   int error=0;
5043   DBUG_ENTER("ha_enable_transaction");
5044   DBUG_PRINT("enter", ("on: %d", (int) on));
5045 
5046   if ((thd->get_transaction()->m_flags.enabled= on))
5047   {
5048     /*
5049       Now all storage engines should have transaction handling enabled.
5050       But some may have it enabled all the time - "disabling" transactions
5051       is an optimization hint that storage engine is free to ignore.
5052       So, let's commit an open transaction (if any) now.
5053     */
5054     if (!(error= ha_commit_trans(thd, 0)))
5055       error= trans_commit_implicit(thd);
5056   }
5057   DBUG_RETURN(error);
5058 }
5059 
index_next_same(uchar * buf,const uchar * key,uint keylen)5060 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5061 {
5062   int error;
5063   DBUG_ENTER("index_next_same");
5064   if (!(error=index_next(buf)))
5065   {
5066     my_ptrdiff_t ptrdiff= buf - table->record[0];
5067     uchar *save_record_0= NULL;
5068     KEY *key_info= NULL;
5069     KEY_PART_INFO *key_part= NULL;
5070     KEY_PART_INFO *key_part_end= NULL;
5071 
5072     /*
5073       key_cmp_if_same() compares table->record[0] against 'key'.
5074       In parts it uses table->record[0] directly, in parts it uses
5075       field objects with their local pointers into table->record[0].
5076       If 'buf' is distinct from table->record[0], we need to move
5077       all record references. This is table->record[0] itself and
5078       the field pointers of the fields used in this key.
5079     */
5080     if (ptrdiff)
5081     {
5082       save_record_0= table->record[0];
5083       table->record[0]= buf;
5084       key_info= table->key_info + active_index;
5085       key_part= key_info->key_part;
5086       key_part_end= key_part + key_info->user_defined_key_parts;
5087       for (; key_part < key_part_end; key_part++)
5088       {
5089         assert(key_part->field);
5090         key_part->field->move_field_offset(ptrdiff);
5091       }
5092     }
5093 
5094     if (key_cmp_if_same(table, key, active_index, keylen))
5095     {
5096       table->status=STATUS_NOT_FOUND;
5097       error=HA_ERR_END_OF_FILE;
5098     }
5099 
5100     /* Move back if necessary. */
5101     if (ptrdiff)
5102     {
5103       table->record[0]= save_record_0;
5104       for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5105         key_part->field->move_field_offset(-ptrdiff);
5106     }
5107   }
5108   DBUG_RETURN(error);
5109 }
5110 
5111 /****************************************************************************
5112 ** Some general functions that isn't in the handler class
5113 ****************************************************************************/
5114 
5115 /**
5116   Initiates table-file and calls appropriate database-creator.
5117 
5118   @retval
5119    0  ok
5120   @retval
5121    1  error
5122 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table)5123 int ha_create_table(THD *thd, const char *path,
5124                     const char *db, const char *table_name,
5125                     HA_CREATE_INFO *create_info,
5126                     bool update_create_info,
5127                     bool is_temp_table)
5128 {
5129   int error= 1;
5130   TABLE table;
5131   char name_buff[FN_REFLEN];
5132   const char *name;
5133   TABLE_SHARE share;
5134 #ifdef HAVE_PSI_TABLE_INTERFACE
5135   bool temp_table = is_temp_table ||
5136     (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5137     (strstr(path, tmp_file_prefix) != NULL);
5138 #endif
5139   DBUG_ENTER("ha_create_table");
5140 
5141   init_tmp_table_share(thd, &share, db, 0, table_name, path);
5142   if (open_table_def(thd, &share, 0))
5143     goto err;
5144 
5145 #ifdef HAVE_PSI_TABLE_INTERFACE
5146   share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5147 #endif
5148 
5149   if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5150                             TRUE))
5151   {
5152 #ifdef HAVE_PSI_TABLE_INTERFACE
5153     PSI_TABLE_CALL(drop_table_share)
5154       (temp_table, db, strlen(db), table_name, strlen(table_name));
5155 #endif
5156     goto err;
5157   }
5158 
5159   if (update_create_info)
5160     update_create_info_from_table(create_info, &table);
5161 
5162   name= get_canonical_filename(table.file, share.path.str, name_buff);
5163 
5164   error= table.file->ha_create(name, &table, create_info);
5165   if (error)
5166   {
5167     table.file->print_error(error, MYF(0));
5168 #ifdef HAVE_PSI_TABLE_INTERFACE
5169     PSI_TABLE_CALL(drop_table_share)
5170       (temp_table, db, strlen(db), table_name, strlen(table_name));
5171 #endif
5172   }
5173   (void) closefrm(&table, 0);
5174 err:
5175   free_table_share(&share);
5176   DBUG_RETURN(error != 0);
5177 }
5178 
5179 /**
5180   Try to discover table from engine.
5181 
5182   @note
5183     If found, write the frm file to disk.
5184 
5185   @retval
5186   -1    Table did not exists
5187   @retval
5188    0    Table created ok
5189   @retval
5190    > 0  Error, table existed but could not be created
5191 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5192 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5193 {
5194   int error;
5195   uchar *frmblob;
5196   size_t frmlen;
5197   char path[FN_REFLEN + 1];
5198   HA_CREATE_INFO create_info;
5199   TABLE table;
5200   TABLE_SHARE share;
5201   DBUG_ENTER("ha_create_table_from_engine");
5202   DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5203 
5204   if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5205   {
5206     /* Table could not be discovered and thus not created */
5207     DBUG_RETURN(error);
5208   }
5209 
5210   /*
5211     Table exists in handler and could be discovered
5212     frmblob and frmlen are set, write the frm to disk
5213   */
5214 
5215   build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5216   // Save the frm file
5217   error= writefrm(path, frmblob, frmlen);
5218   my_free(frmblob);
5219   if (error)
5220     DBUG_RETURN(2);
5221 
5222   init_tmp_table_share(thd, &share, db, 0, name, path);
5223   if (open_table_def(thd, &share, 0))
5224   {
5225     DBUG_RETURN(3);
5226   }
5227 
5228 #ifdef HAVE_PSI_TABLE_INTERFACE
5229   /*
5230     Table discovery is not instrumented.
5231     Once discovered, the table will be opened normally,
5232     and instrumented normally.
5233   */
5234 #endif
5235 
5236   if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5237   {
5238     free_table_share(&share);
5239     DBUG_RETURN(3);
5240   }
5241 
5242   update_create_info_from_table(&create_info, &table);
5243   create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5244 
5245   get_canonical_filename(table.file, path, path);
5246   error=table.file->ha_create(path, &table, &create_info);
5247   (void) closefrm(&table, 1);
5248 
5249   DBUG_RETURN(error != 0);
5250 }
5251 
5252 
5253 /**
5254   Try to find a table in a storage engine.
5255 
5256   @param db   Normalized table schema name
5257   @param name Normalized table name.
5258   @param[out] exists Only valid if the function succeeded.
5259 
5260   @retval TRUE   An error is found
5261   @retval FALSE  Success, check *exists
5262 */
5263 
5264 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5265 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5266                          bool *exists)
5267 {
5268   uchar *frmblob= NULL;
5269   size_t frmlen;
5270   DBUG_ENTER("ha_check_if_table_exists");
5271 
5272   *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5273   if (*exists)
5274     my_free(frmblob);
5275 
5276   DBUG_RETURN(FALSE);
5277 }
5278 
5279 
5280 /**
5281   @brief Check if a given table is a user table or a valid system table or
5282          a valid system table that a SE supports.
5283 
5284   @param   hton                  Handlerton of new engine.
5285   @param   db                    Database name.
5286   @param   table_name            Table name to be checked.
5287 
5288   @retval  st_sys_tbl_chk_params::enum_status
5289 */
5290 static st_sys_tbl_chk_params::enum_status
ha_get_system_table_check_status(handlerton * hton,const char * db,const char * table_name)5291 ha_get_system_table_check_status(handlerton *hton, const char *db,
5292                                    const char *table_name)
5293 {
5294   DBUG_ENTER("ha_get_system_table_check_status");
5295   st_sys_tbl_chk_params check_params;
5296   check_params.status= st_sys_tbl_chk_params::USER_TABLE;
5297   bool is_system_database= false;
5298   const char **names;
5299   st_handler_tablename *systab;
5300 
5301   // Check if we have a system database name in the command.
5302   assert(known_system_databases != NULL);
5303   names= known_system_databases;
5304   while (names && *names)
5305   {
5306     if (strcmp(*names, db) == 0)
5307     {
5308       /* Used to compare later, will be faster */
5309       check_params.db= *names;
5310       is_system_database= true;
5311       break;
5312     }
5313     names++;
5314   }
5315   if (!is_system_database)
5316     DBUG_RETURN(st_sys_tbl_chk_params::USER_TABLE);
5317 
5318   // Check if this is SQL layer system tables.
5319   systab= mysqld_system_tables;
5320   check_params.is_sql_layer_system_table= false;
5321   while (systab && systab->db)
5322   {
5323     if (systab->db == check_params.db &&
5324         strcmp(systab->tablename, table_name) == 0)
5325     {
5326       check_params.is_sql_layer_system_table= true;
5327       break;
5328     }
5329     systab++;
5330   }
5331 
5332   // Check if this is a system table and if some engine supports it.
5333   check_params.status= check_params.is_sql_layer_system_table ?
5334     st_sys_tbl_chk_params::SYSTEM_TABLE :
5335     st_sys_tbl_chk_params::USER_TABLE;
5336   check_params.db_type= hton->db_type;
5337   check_params.table_name= table_name;
5338   plugin_foreach(NULL, check_engine_system_table_handlerton,
5339                  MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5340 
5341   DBUG_RETURN(check_params.status);
5342 }
5343 
5344 
5345 /**
5346   @brief Check if a given table is a system table supported by a SE.
5347 
5348   @todo There is another function called is_system_table_name() used by
5349         get_table_category(), which is used to set TABLE_SHARE table_category.
5350         It checks only a subset of table name like proc, event and time*.
5351         We cannot use below function in get_table_category(),
5352         as that affects locking mechanism. If we need to
5353         unify these functions, we need to fix locking issues generated.
5354 
5355   @param   hton                  Handlerton of new engine.
5356   @param   db                    Database name.
5357   @param   table_name            Table name to be checked.
5358 
5359   @return Operation status
5360     @retval  true                If the table name is a valid system table
5361                                  that is supported by a SE.
5362 
5363     @retval  false               Not a system table.
5364 */
ha_is_supported_system_table(handlerton * hton,const char * db,const char * table_name)5365 bool ha_is_supported_system_table(handlerton *hton, const char *db,
5366                                   const char *table_name)
5367 {
5368   DBUG_ENTER("ha_is_supported_system_table");
5369   st_sys_tbl_chk_params::enum_status status=
5370     ha_get_system_table_check_status(hton, db, table_name);
5371 
5372   // It's a valid SE supported system table.
5373   DBUG_RETURN(status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5374 }
5375 
5376 
5377 /**
5378   @brief Check if a given table is a system table that belongs
5379   to some SE or a user table.
5380 
5381   @details The primary purpose of introducing this function is to stop system
5382   tables to be created or being moved to undesired storage engines.
5383 
5384   @todo There is another function called is_system_table_name() used by
5385         get_table_category(), which is used to set TABLE_SHARE table_category.
5386         It checks only a subset of table name like proc, event and time*.
5387         We cannot use below function in get_table_category(),
5388         as that affects locking mechanism. If we need to
5389         unify these functions, we need to fix locking issues generated.
5390 
5391   @param   hton                  Handlerton of new engine.
5392   @param   db                    Database name.
5393   @param   table_name            Table name to be checked.
5394 
5395   @return Operation status
5396     @retval  true                If the table name is a valid system table
5397                                  or if its a valid user table.
5398 
5399     @retval  false               If the table name is a system table name
5400                                  and does not belong to engine specified
5401                                  in the command.
5402 */
ha_is_valid_system_or_user_table(handlerton * hton,const char * db,const char * table_name)5403 bool ha_is_valid_system_or_user_table(handlerton *hton, const char *db,
5404                                       const char *table_name)
5405 {
5406   DBUG_ENTER("ha_is_valid_system_or_user_table");
5407 
5408   st_sys_tbl_chk_params::enum_status status=
5409     ha_get_system_table_check_status(hton, db, table_name);
5410 
5411   // It's a user table or a valid SE supported system table.
5412   DBUG_RETURN(status == st_sys_tbl_chk_params::USER_TABLE ||
5413               status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5414 }
5415 
5416 
5417 /**
5418   @brief Called for each SE to check if given db, tablename is a system table.
5419 
5420   @details The primary purpose of introducing this function is to stop system
5421   tables to be created or being moved to undesired storage engines.
5422 
5423   @param   unused  unused THD*
5424   @param   plugin  Points to specific SE.
5425   @param   arg     Is of type struct st_sys_tbl_chk_params.
5426 
5427   @note
5428     args->status   Indicates OUT param,
5429                    see struct st_sys_tbl_chk_params definition for more info.
5430 
5431   @return Operation status
5432     @retval  true  There was a match found.
5433                    This will stop doing checks with other SE's.
5434 
5435     @retval  false There was no match found.
5436                    Other SE's will be checked to find a match.
5437 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5438 static my_bool check_engine_system_table_handlerton(THD *unused,
5439                                                     plugin_ref plugin,
5440                                                     void *arg)
5441 {
5442   st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5443   handlerton *hton= plugin_data<handlerton*>(plugin);
5444 
5445   // Do we already know that the table is a system table?
5446   if (check_params->status == st_sys_tbl_chk_params::SYSTEM_TABLE)
5447   {
5448     /*
5449       If this is the same SE specified in the command, we can
5450       simply ask the SE if it supports it stop the search regardless.
5451     */
5452     if (hton->db_type == check_params->db_type)
5453     {
5454       if (hton->is_supported_system_table &&
5455           hton->is_supported_system_table(check_params->db,
5456                                        check_params->table_name,
5457                                        check_params->is_sql_layer_system_table))
5458         check_params->status=
5459           st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5460       return TRUE;
5461     }
5462     /*
5463       If this is a different SE, there is no point in asking the SE
5464       since we already know it's a system table and we don't care
5465       if it is supported or not.
5466     */
5467     return FALSE;
5468   }
5469 
5470   /*
5471     We don't yet know if the table is a system table or not.
5472     We therefore must always ask the SE.
5473   */
5474   if (hton->is_supported_system_table &&
5475       hton->is_supported_system_table(check_params->db,
5476                                       check_params->table_name,
5477                                       check_params->is_sql_layer_system_table))
5478   {
5479     /*
5480       If this is the same SE specified in the command, we know it's a
5481       supported system table and can stop the search.
5482     */
5483     if (hton->db_type == check_params->db_type)
5484     {
5485       check_params->status= st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5486       return TRUE;
5487     }
5488     else
5489       check_params->status= st_sys_tbl_chk_params::SYSTEM_TABLE;
5490   }
5491 
5492   return FALSE;
5493 }
5494 
5495 /*
5496   Prepare list of all known system database names
5497   current we just have 'mysql' as system database name.
5498 
5499   Later ndbcluster, innodb SE's can define some new database
5500   name which can store system tables specific to SE.
5501 */
ha_known_system_databases(void)5502 const char** ha_known_system_databases(void)
5503 {
5504   list<const char*> found_databases;
5505   const char **databases, **database;
5506 
5507   // Get mysqld system database name.
5508   found_databases.push_back((char*) mysqld_system_database);
5509 
5510   // Get system database names from every specific storage engine.
5511   plugin_foreach(NULL, system_databases_handlerton,
5512                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5513 
5514   databases= (const char **) my_once_alloc(sizeof(char *)*
5515                                      (found_databases.size()+1),
5516                                      MYF(MY_WME | MY_FAE));
5517   assert(databases != NULL);
5518 
5519   list<const char*>::iterator it;
5520   database= databases;
5521   for (it= found_databases.begin(); it != found_databases.end(); it++)
5522     *database++= *it;
5523   *database= 0; // Last element.
5524 
5525   return databases;
5526 }
5527 
5528 /**
5529   @brief Fetch system database name specific to SE.
5530 
5531   @details This function is invoked by plugin_foreach() from
5532            ha_known_system_databases(), for each storage engine.
5533 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5534 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5535                                            void *arg)
5536 {
5537   list<const char*> *found_databases= (list<const char*> *) arg;
5538   const char *db;
5539 
5540   handlerton *hton= plugin_data<handlerton*>(plugin);
5541   if (hton->system_database)
5542   {
5543     db= hton->system_database();
5544     if (db)
5545       found_databases->push_back(db);
5546   }
5547 
5548   return FALSE;
5549 }
5550 
init()5551 void st_ha_check_opt::init()
5552 {
5553   flags= sql_flags= 0;
5554 }
5555 
5556 
5557 /*****************************************************************************
5558   Key cache handling.
5559 
5560   This code is only relevant for ISAM/MyISAM tables
5561 
5562   key_cache->cache may be 0 only in the case where a key cache is not
5563   initialized or when we where not able to init the key cache in a previous
5564   call to ha_init_key_cache() (probably out of memory)
5565 *****************************************************************************/
5566 
5567 /**
5568   Init a key cache if it has not been initied before.
5569 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5570 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5571 {
5572   DBUG_ENTER("ha_init_key_cache");
5573 
5574   if (!key_cache->key_cache_inited)
5575   {
5576     mysql_mutex_lock(&LOCK_global_system_variables);
5577     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5578     ulonglong tmp_block_size= key_cache->param_block_size;
5579     ulonglong division_limit= key_cache->param_division_limit;
5580     ulonglong age_threshold=  key_cache->param_age_threshold;
5581     mysql_mutex_unlock(&LOCK_global_system_variables);
5582     DBUG_RETURN(!init_key_cache(key_cache,
5583 				tmp_block_size,
5584 				tmp_buff_size,
5585 				division_limit, age_threshold));
5586   }
5587   DBUG_RETURN(0);
5588 }
5589 
5590 
5591 /**
5592   Resize key cache.
5593 */
ha_resize_key_cache(KEY_CACHE * key_cache)5594 int ha_resize_key_cache(KEY_CACHE *key_cache)
5595 {
5596   DBUG_ENTER("ha_resize_key_cache");
5597 
5598   if (key_cache->key_cache_inited)
5599   {
5600     mysql_mutex_lock(&LOCK_global_system_variables);
5601     size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5602     ulonglong tmp_block_size= key_cache->param_block_size;
5603     ulonglong division_limit= key_cache->param_division_limit;
5604     ulonglong age_threshold=  key_cache->param_age_threshold;
5605     mysql_mutex_unlock(&LOCK_global_system_variables);
5606     const int retval= resize_key_cache(key_cache,
5607                                        keycache_thread_var(),
5608                                        tmp_block_size,
5609                                        tmp_buff_size,
5610                                        division_limit, age_threshold);
5611     DBUG_RETURN(!retval);
5612   }
5613   DBUG_RETURN(0);
5614 }
5615 
5616 
5617 /**
5618   Move all tables from one key cache to another one.
5619 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5620 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5621 			KEY_CACHE *new_key_cache)
5622 {
5623   mi_change_key_cache(old_key_cache, new_key_cache);
5624   return 0;
5625 }
5626 
5627 
5628 /**
5629   Try to discover one table from handler(s).
5630 
5631   @retval
5632     -1   Table did not exists
5633   @retval
5634     0   OK. In this case *frmblob and *frmlen are set
5635   @retval
5636     >0   error.  frmblob and frmlen may not be set
5637 */
5638 struct st_discover_args
5639 {
5640   const char *db;
5641   const char *name;
5642   uchar **frmblob;
5643   size_t *frmlen;
5644 };
5645 
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5646 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5647                                    void *arg)
5648 {
5649   st_discover_args *vargs= (st_discover_args *)arg;
5650   handlerton *hton= plugin_data<handlerton*>(plugin);
5651   if (hton->state == SHOW_OPTION_YES && hton->discover &&
5652       (!(hton->discover(hton, thd, vargs->db, vargs->name,
5653                         vargs->frmblob,
5654                         vargs->frmlen))))
5655     return TRUE;
5656 
5657   return FALSE;
5658 }
5659 
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5660 int ha_discover(THD *thd, const char *db, const char *name,
5661 		uchar **frmblob, size_t *frmlen)
5662 {
5663   int error= -1; // Table does not exist in any handler
5664   DBUG_ENTER("ha_discover");
5665   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5666   st_discover_args args= {db, name, frmblob, frmlen};
5667 
5668   if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5669     DBUG_RETURN(error);
5670 
5671   if (plugin_foreach(thd, discover_handlerton,
5672                  MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5673     error= 0;
5674 
5675   if (!error)
5676   {
5677     assert(!thd->status_var_aggregated);
5678     thd->status_var.ha_discover_count++;
5679   }
5680   DBUG_RETURN(error);
5681 }
5682 
5683 
5684 /**
5685   Call this function in order to give the handler the possiblity
5686   to ask engine if there are any new tables that should be written to disk
5687   or any dropped tables that need to be removed from disk
5688 */
5689 struct st_find_files_args
5690 {
5691   const char *db;
5692   const char *path;
5693   const char *wild;
5694   bool dir;
5695   List<LEX_STRING> *files;
5696 };
5697 
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5698 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5699                                    void *arg)
5700 {
5701   st_find_files_args *vargs= (st_find_files_args *)arg;
5702   handlerton *hton= plugin_data<handlerton*>(plugin);
5703 
5704 
5705   if (hton->state == SHOW_OPTION_YES && hton->find_files)
5706       if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5707                           vargs->dir, vargs->files))
5708         return TRUE;
5709 
5710   return FALSE;
5711 }
5712 
5713 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5714 ha_find_files(THD *thd,const char *db,const char *path,
5715 	      const char *wild, bool dir, List<LEX_STRING> *files)
5716 {
5717   int error= 0;
5718   DBUG_ENTER("ha_find_files");
5719   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d",
5720 		       db, path, wild ? wild : "NULL", dir));
5721   st_find_files_args args= {db, path, wild, dir, files};
5722 
5723   plugin_foreach(thd, find_files_handlerton,
5724                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5725   /* The return value is not currently used */
5726   DBUG_RETURN(error);
5727 }
5728 
5729 /**
5730   Ask handler if the table exists in engine.
5731   @retval
5732     HA_ERR_NO_SUCH_TABLE     Table does not exist
5733   @retval
5734     HA_ERR_TABLE_EXIST       Table exists
5735   @retval
5736     \#                  Error code
5737 */
5738 struct st_table_exists_in_engine_args
5739 {
5740   const char *db;
5741   const char *name;
5742   int err;
5743 };
5744 
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5745 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5746                                    void *arg)
5747 {
5748   st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5749   handlerton *hton= plugin_data<handlerton*>(plugin);
5750 
5751   int err= HA_ERR_NO_SUCH_TABLE;
5752 
5753   if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5754     err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5755 
5756   vargs->err = err;
5757   if (vargs->err == HA_ERR_TABLE_EXIST)
5758     return TRUE;
5759 
5760   return FALSE;
5761 }
5762 
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5763 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5764 {
5765   DBUG_ENTER("ha_table_exists_in_engine");
5766   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5767   st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5768   plugin_foreach(thd, table_exists_in_engine_handlerton,
5769                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5770   DBUG_PRINT("exit", ("error: %d", args.err));
5771   DBUG_RETURN(args.err);
5772 }
5773 
5774 /**
5775   Prepare (sub-) sequences of joins in this statement
5776   which may be pushed to each storage engine for execution.
5777 */
5778 struct st_make_pushed_join_args
5779 {
5780   const AQP::Join_plan* plan; // Query plan provided by optimizer
5781   int err;                    // Error code to return.
5782 };
5783 
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5784 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5785                                    void *arg)
5786 {
5787   st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5788   handlerton *hton= plugin_data<handlerton*>(plugin);
5789 
5790   if (hton && hton->make_pushed_join)
5791   {
5792     const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5793     if (unlikely(error))
5794     {
5795       vargs->err = error;
5796       return TRUE;
5797     }
5798   }
5799   return FALSE;
5800 }
5801 
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5802 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5803 {
5804   DBUG_ENTER("ha_make_pushed_joins");
5805   st_make_pushed_join_args args= {plan, 0};
5806   plugin_foreach(thd, make_pushed_join_handlerton,
5807                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5808   DBUG_PRINT("exit", ("error: %d", args.err));
5809   DBUG_RETURN(args.err);
5810 }
5811 
5812 /*
5813   TODO: change this into a dynamic struct
5814   List<handlerton> does not work as
5815   1. binlog_end is called when MEM_ROOT is gone
5816   2. cannot work with thd MEM_ROOT as memory should be freed
5817 */
5818 #define MAX_HTON_LIST_ST 63
5819 struct hton_list_st
5820 {
5821   handlerton *hton[MAX_HTON_LIST_ST];
5822   uint sz;
5823 };
5824 
5825 struct binlog_func_st
5826 {
5827   enum_binlog_func fn;
5828   void *arg;
5829 };
5830 
5831 /** @brief
5832   Listing handlertons first to avoid recursive calls and deadlock
5833 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5834 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5835 {
5836   hton_list_st *hton_list= (hton_list_st *)arg;
5837   handlerton *hton= plugin_data<handlerton*>(plugin);
5838   if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5839   {
5840     uint sz= hton_list->sz;
5841     if (sz == MAX_HTON_LIST_ST-1)
5842     {
5843       /* list full */
5844       return FALSE;
5845     }
5846     hton_list->hton[sz]= hton;
5847     hton_list->sz= sz+1;
5848   }
5849   return FALSE;
5850 }
5851 
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5852 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5853 {
5854   hton_list_st hton_list;
5855   uint i, sz;
5856 
5857   hton_list.sz= 0;
5858   plugin_foreach(thd, binlog_func_list,
5859                  MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5860 
5861   for (i= 0, sz= hton_list.sz; i < sz ; i++)
5862     hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5863   return FALSE;
5864 }
5865 
5866 
ha_reset_logs(THD * thd)5867 int ha_reset_logs(THD *thd)
5868 {
5869   binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5870   binlog_func_foreach(thd, &bfn);
5871   return 0;
5872 }
5873 
ha_reset_slave(THD * thd)5874 void ha_reset_slave(THD* thd)
5875 {
5876   binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5877   binlog_func_foreach(thd, &bfn);
5878 }
5879 
ha_binlog_wait(THD * thd)5880 void ha_binlog_wait(THD* thd)
5881 {
5882   binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5883   binlog_func_foreach(thd, &bfn);
5884 }
5885 
ha_binlog_index_purge_file(THD * thd,const char * file)5886 int ha_binlog_index_purge_file(THD *thd, const char *file)
5887 {
5888   binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5889   binlog_func_foreach(thd, &bfn);
5890   return 0;
5891 }
5892 
5893 struct binlog_log_query_st
5894 {
5895   enum_binlog_command binlog_command;
5896   const char *query;
5897   size_t query_length;
5898   const char *db;
5899   const char *table_name;
5900 };
5901 
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5902 static my_bool binlog_log_query_handlerton2(THD *thd,
5903                                             handlerton *hton,
5904                                             void *args)
5905 {
5906   struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
5907   if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5908     hton->binlog_log_query(hton, thd,
5909                            b->binlog_command,
5910                            b->query,
5911                            b->query_length,
5912                            b->db,
5913                            b->table_name);
5914   return FALSE;
5915 }
5916 
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5917 static my_bool binlog_log_query_handlerton(THD *thd,
5918                                            plugin_ref plugin,
5919                                            void *args)
5920 {
5921   return binlog_log_query_handlerton2(thd,
5922                                       plugin_data<handlerton*>(plugin), args);
5923 }
5924 
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)5925 void ha_binlog_log_query(THD *thd, handlerton *hton,
5926                          enum_binlog_command binlog_command,
5927                          const char *query, size_t query_length,
5928                          const char *db, const char *table_name)
5929 {
5930   struct binlog_log_query_st b;
5931   b.binlog_command= binlog_command;
5932   b.query= query;
5933   b.query_length= query_length;
5934   b.db= db;
5935   b.table_name= table_name;
5936   if (hton == 0)
5937     plugin_foreach(thd, binlog_log_query_handlerton,
5938                    MYSQL_STORAGE_ENGINE_PLUGIN, &b);
5939   else
5940     binlog_log_query_handlerton2(thd, hton, &b);
5941 }
5942 
ha_binlog_end(THD * thd)5943 int ha_binlog_end(THD* thd)
5944 {
5945   binlog_func_st bfn= {BFN_BINLOG_END, 0};
5946   binlog_func_foreach(thd, &bfn);
5947   return 0;
5948 }
5949 
5950 /**
5951   Calculate cost of 'index only' scan for given index and number of records
5952 
5953   @param keynr    Index number
5954   @param records  Estimated number of records to be retrieved
5955 
5956   @note
5957     It is assumed that we will read trough the whole key range and that all
5958     key blocks are half full (normally things are much better). It is also
5959     assumed that each time we read the next key from the index, the handler
5960     performs a random seek, thus the cost is proportional to the number of
5961     blocks read.
5962 
5963   @return
5964     Estimated cost of 'index only' scan
5965 */
5966 
index_only_read_time(uint keynr,double records)5967 double handler::index_only_read_time(uint keynr, double records)
5968 {
5969   double read_time;
5970   uint keys_per_block= (stats.block_size/2/
5971                         (table_share->key_info[keynr].key_length + ref_length) +
5972                         1);
5973   read_time=((double) (records + keys_per_block-1) /
5974              (double) keys_per_block);
5975   return read_time;
5976 }
5977 
5978 
table_in_memory_estimate() const5979 double handler::table_in_memory_estimate() const
5980 {
5981   assert(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
5982          (stats.table_in_mem_estimate >= 0.0 &&
5983           stats.table_in_mem_estimate <= 1.0));
5984 
5985   /*
5986     If the storage engine has supplied information about how much of the
5987     table that is currently in a memory buffer, then use this estimate.
5988   */
5989   if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
5990     return stats.table_in_mem_estimate;
5991 
5992   /*
5993     The storage engine has not provided any information about how much of
5994     this index is in memory, use an heuristic to produce an estimate.
5995   */
5996   return estimate_in_memory_buffer(stats.data_file_length);
5997 }
5998 
5999 
index_in_memory_estimate(uint keyno) const6000 double handler::index_in_memory_estimate(uint keyno) const
6001 {
6002   const KEY *key= &table->key_info[keyno];
6003 
6004   /*
6005     If the storage engine has supplied information about how much of the
6006     index that is currently in a memory buffer, then use this estimate.
6007   */
6008   const double est= key->in_memory_estimate();
6009   if (est != IN_MEMORY_ESTIMATE_UNKNOWN)
6010     return est;
6011 
6012   /*
6013     The storage engine has not provided any information about how much of
6014     this index is in memory, use an heuristic to produce an estimate.
6015   */
6016   ulonglong file_length;
6017 
6018   /*
6019     If the index is a clustered primary index, then use the data file
6020     size as estimate for how large the index is.
6021   */
6022   if (keyno == table->s->primary_key && primary_key_is_clustered())
6023     file_length= stats.data_file_length;
6024   else
6025     file_length= stats.index_file_length;
6026 
6027   return estimate_in_memory_buffer(file_length);
6028 }
6029 
6030 
estimate_in_memory_buffer(ulonglong table_index_size) const6031 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const
6032 {
6033   /*
6034     The storage engine has not provided any information about how much of
6035     the table/index is in memory. In this case we use a heuristic:
6036 
6037     - if the size of the table/index is less than 20 percent (pick any
6038       number) of the memory buffer, then the entire table/index is likely in
6039       memory.
6040     - if the size of the table/index is larger than the memory buffer, then
6041       assume nothing of the table/index is in memory.
6042     - if the size of the table/index is larger than 20 percent but less than
6043       the memory buffer size, then use a linear function of the table/index
6044       size that goes from 1.0 to 0.0.
6045   */
6046 
6047   /*
6048     If the storage engine has information about the size of its
6049     memory buffer, then use this. Otherwise, assume that at least 100 MB
6050     of data can be chached in memory.
6051   */
6052   longlong memory_buf_size= get_memory_buffer_size();
6053   if (memory_buf_size <= 0)
6054     memory_buf_size= 100 * 1024 * 1024;    // 100 MB
6055 
6056   /*
6057     Upper limit for the relative size of a table to be considered
6058     entirely available in a memory buffer. If the actual table size is
6059     less than this we assume it is complete cached in a memory buffer.
6060   */
6061   const double table_index_in_memory_limit= 0.2;
6062 
6063   /*
6064     Estimate for how much of the total memory buffer this table/index
6065     can occupy.
6066   */
6067   const double percent_of_mem= static_cast<double>(table_index_size) /
6068     memory_buf_size;
6069 
6070   double in_mem_est;
6071 
6072   if (percent_of_mem < table_index_in_memory_limit) // Less than 20 percent
6073     in_mem_est= 1.0;
6074   else if (percent_of_mem > 1.0)                // Larger than buffer
6075     in_mem_est= 0.0;
6076   else
6077   {
6078     /*
6079       The size of the table/index is larger than
6080       "table_index_in_memory_limit" * "memory_buf_size" but less than
6081       the total size of the memory buffer.
6082     */
6083     in_mem_est= 1.0 - (percent_of_mem - table_index_in_memory_limit) /
6084       (1.0 - table_index_in_memory_limit);
6085   }
6086   assert(in_mem_est >= 0.0 && in_mem_est <= 1.0);
6087 
6088   return in_mem_est;
6089 }
6090 
6091 
table_scan_cost()6092 Cost_estimate handler::table_scan_cost()
6093 {
6094   /*
6095     This function returns a Cost_estimate object. The function should be
6096     implemented in a way that allows the compiler to use "return value
6097     optimization" to avoid creating the temporary object for the return value
6098     and use of the copy constructor.
6099   */
6100 
6101   const double io_cost= scan_time() * table->cost_model()->page_read_cost(1.0);
6102   Cost_estimate cost;
6103   cost.add_io(io_cost);
6104   return cost;
6105 }
6106 
6107 
index_scan_cost(uint index,double ranges,double rows)6108 Cost_estimate handler::index_scan_cost(uint index, double ranges, double rows)
6109 {
6110   /*
6111     This function returns a Cost_estimate object. The function should be
6112     implemented in a way that allows the compiler to use "return value
6113     optimization" to avoid creating the temporary object for the return value
6114     and use of the copy constructor.
6115   */
6116 
6117   assert(ranges >= 0.0);
6118   assert(rows >= 0.0);
6119 
6120   const double io_cost= index_only_read_time(index, rows) *
6121     table->cost_model()->page_read_cost_index(index, 1.0);
6122   Cost_estimate cost;
6123   cost.add_io(io_cost);
6124   return cost;
6125 }
6126 
6127 
read_cost(uint index,double ranges,double rows)6128 Cost_estimate handler::read_cost(uint index, double ranges, double rows)
6129 {
6130   /*
6131     This function returns a Cost_estimate object. The function should be
6132     implemented in a way that allows the compiler to use "return value
6133     optimization" to avoid creating the temporary object for the return value
6134     and use of the copy constructor.
6135   */
6136 
6137   assert(ranges >= 0.0);
6138   assert(rows >= 0.0);
6139 
6140   const double io_cost= read_time(index, static_cast<uint>(ranges),
6141                                   static_cast<ha_rows>(rows)) *
6142                         table->cost_model()->page_read_cost(1.0);
6143   Cost_estimate cost;
6144   cost.add_io(io_cost);
6145   return cost;
6146 }
6147 
6148 
6149 /**
6150   Check if key has partially-covered columns
6151 
6152   We can't use DS-MRR to perform range scans when the ranges are over
6153   partially-covered keys, because we'll not have full key part values
6154   (we'll have their prefixes from the index) and will not be able to check
6155   if we've reached the end the range.
6156 
6157   @param keyno  Key to check
6158 
6159   @todo
6160     Allow use of DS-MRR in cases where the index has partially-covered
6161     components but they are not used for scanning.
6162 
6163   @retval TRUE   Yes
6164   @retval FALSE  No
6165 */
6166 
key_uses_partial_cols(TABLE * table,uint keyno)6167 bool key_uses_partial_cols(TABLE *table, uint keyno)
6168 {
6169   KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6170   KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6171   for (; kp != kp_end; kp++)
6172   {
6173     if (!kp->field->part_of_key.is_set(keyno))
6174       return TRUE;
6175   }
6176   return FALSE;
6177 }
6178 
6179 /****************************************************************************
6180  * Default MRR implementation (MRR to non-MRR converter)
6181  ***************************************************************************/
6182 
6183 /**
6184   Get cost and other information about MRR scan over a known list of ranges
6185 
6186   Calculate estimated cost and other information about an MRR scan for given
6187   sequence of ranges.
6188 
6189   @param keyno           Index number
6190   @param seq             Range sequence to be traversed
6191   @param seq_init_param  First parameter for seq->init()
6192   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
6193                          can't efficiently determine it
6194   @param bufsz[in,out]   IN:  Size of the buffer available for use
6195                          OUT: Size of the buffer that is expected to be actually
6196                               used, or 0 if buffer is not needed.
6197   @param flags[in,out]   A combination of HA_MRR_* flags
6198   @param cost[out]       Estimated cost of MRR access
6199 
6200   @note
6201     This method (or an overriding one in a derived class) must check for
6202     thd->killed and return HA_POS_ERROR if it is not zero. This is required
6203     for a user to be able to interrupt the calculation by killing the
6204     connection/query.
6205 
6206   @retval
6207     HA_POS_ERROR  Error or the engine is unable to perform the requested
6208                   scan. Values of OUT parameters are undefined.
6209   @retval
6210     other         OK, *cost contains cost of the scan, *bufsz and *flags
6211                   contain scan parameters.
6212 */
6213 
6214 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6215 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6216                                      void *seq_init_param, uint n_ranges_arg,
6217                                      uint *bufsz, uint *flags,
6218                                      Cost_estimate *cost)
6219 {
6220   KEY_MULTI_RANGE range;
6221   range_seq_t seq_it;
6222   ha_rows rows, total_rows= 0;
6223   uint n_ranges=0;
6224   THD *thd= current_thd;
6225 
6226   /* Default MRR implementation doesn't need buffer */
6227   *bufsz= 0;
6228 
6229   DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6230 
6231   seq_it= seq->init(seq_init_param, n_ranges, *flags);
6232   while (!seq->next(seq_it, &range))
6233   {
6234     if (unlikely(thd->killed != 0))
6235       return HA_POS_ERROR;
6236 
6237     n_ranges++;
6238     key_range *min_endp, *max_endp;
6239     if (range.range_flag & GEOM_FLAG)
6240     {
6241       min_endp= &range.start_key;
6242       max_endp= NULL;
6243     }
6244     else
6245     {
6246       min_endp= range.start_key.length? &range.start_key : NULL;
6247       max_endp= range.end_key.length? &range.end_key : NULL;
6248     }
6249     /*
6250       Get the number of rows in the range. This is done by calling
6251       records_in_range() unless:
6252 
6253         1) The range is an equality range and the index is unique.
6254            There cannot be more than one matching row, so 1 is
6255            assumed. Note that it is possible that the correct number
6256            is actually 0, so the row estimate may be too high in this
6257            case. Also note: ranges of the form "x IS NULL" may have more
6258            than 1 mathing row so records_in_range() is called for these.
6259         2) a) The range is an equality range but the index is either
6260               not unique or all of the keyparts are not used.
6261            b) The user has requested that index statistics should be used
6262               for equality ranges to avoid the incurred overhead of
6263               index dives in records_in_range().
6264            c) Index statistics is available.
6265            Ranges of the form "x IS NULL" will not use index statistics
6266            because the number of rows with this value are likely to be
6267            very different than the values in the index statistics.
6268     */
6269     int keyparts_used= 0;
6270     if ((range.range_flag & UNIQUE_RANGE) &&                        // 1)
6271         !(range.range_flag & NULL_RANGE))
6272       rows= 1; /* there can be at most one row */
6273     else if ((range.range_flag & EQ_RANGE) &&                       // 2a)
6274              (range.range_flag & USE_INDEX_STATISTICS) &&           // 2b)
6275              (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6276              table->
6277                key_info[keyno].has_records_per_key(keyparts_used-1) && // 2c)
6278              !(range.range_flag & NULL_RANGE))
6279     {
6280       rows= static_cast<ha_rows>(
6281         table->key_info[keyno].records_per_key(keyparts_used - 1));
6282     }
6283     else
6284     {
6285       DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6286       assert(min_endp || max_endp);
6287       if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6288                                                         max_endp)))
6289       {
6290         /* Can't scan one range => can't do MRR scan at all */
6291         total_rows= HA_POS_ERROR;
6292         break;
6293       }
6294     }
6295     total_rows += rows;
6296   }
6297 
6298   if (total_rows != HA_POS_ERROR)
6299   {
6300     const Cost_model_table *const cost_model= table->cost_model();
6301 
6302     /* The following calculation is the same as in multi_range_read_info(): */
6303     *flags|= HA_MRR_USE_DEFAULT_IMPL;
6304     *flags|= HA_MRR_SUPPORT_SORTED;
6305 
6306     assert(cost->is_zero());
6307     if (*flags & HA_MRR_INDEX_ONLY)
6308       *cost= index_scan_cost(keyno, static_cast<double>(n_ranges),
6309                              static_cast<double>(total_rows));
6310     else
6311       *cost= read_cost(keyno, static_cast<double>(n_ranges),
6312                        static_cast<double>(total_rows));
6313     cost->add_cpu(cost_model->row_evaluate_cost(
6314       static_cast<double>(total_rows)) + 0.01);
6315   }
6316   return total_rows;
6317 }
6318 
6319 
6320 /**
6321   Get cost and other information about MRR scan over some sequence of ranges
6322 
6323   Calculate estimated cost and other information about an MRR scan for some
6324   sequence of ranges.
6325 
6326   The ranges themselves will be known only at execution phase. When this
6327   function is called we only know number of ranges and a (rough) E(#records)
6328   within those ranges.
6329 
6330   Currently this function is only called for "n-keypart singlepoint" ranges,
6331   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6332 
6333   The flags parameter is a combination of those flags: HA_MRR_SORTED,
6334   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6335 
6336   @param keyno           Index number
6337   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
6338                          range sequence.
6339   @param n_rows          Estimated total number of records contained within all
6340                          of the ranges
6341   @param bufsz[in,out]   IN:  Size of the buffer available for use
6342                          OUT: Size of the buffer that will be actually used, or
6343                               0 if buffer is not needed.
6344   @param flags[in,out]   A combination of HA_MRR_* flags
6345   @param cost[out]       Estimated cost of MRR access
6346 
6347   @retval
6348     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6349           parameters.
6350   @retval
6351     other Error or can't perform the requested scan
6352 */
6353 
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6354 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6355                                        uint *bufsz, uint *flags,
6356                                        Cost_estimate *cost)
6357 {
6358   *bufsz= 0; /* Default implementation doesn't need a buffer */
6359 
6360   *flags|= HA_MRR_USE_DEFAULT_IMPL;
6361   *flags|= HA_MRR_SUPPORT_SORTED;
6362 
6363   assert(cost->is_zero());
6364 
6365   /* Produce the same cost as non-MRR code does */
6366   if (*flags & HA_MRR_INDEX_ONLY)
6367     *cost= index_scan_cost(keyno, n_ranges, n_rows);
6368   else
6369     *cost= read_cost(keyno, n_ranges, n_rows);
6370   return 0;
6371 }
6372 
6373 
6374 /**
6375   Initialize the MRR scan
6376 
6377   Initialize the MRR scan. This function may do heavyweight scan
6378   initialization like row prefetching/sorting/etc (NOTE: but better not do
6379   it here as we may not need it, e.g. if we never satisfy WHERE clause on
6380   previous tables. For many implementations it would be natural to do such
6381   initializations in the first multi_read_range_next() call)
6382 
6383   mode is a combination of the following flags: HA_MRR_SORTED,
6384   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6385 
6386   @param seq             Range sequence to be traversed
6387   @param seq_init_param  First parameter for seq->init()
6388   @param n_ranges        Number of ranges in the sequence
6389   @param mode            Flags, see the description section for the details
6390   @param buf             INOUT: memory buffer to be used
6391 
6392   @note
6393     One must have called index_init() before calling this function. Several
6394     multi_range_read_init() calls may be made in course of one query.
6395 
6396     Until WL#2623 is done (see its text, section 3.2), the following will
6397     also hold:
6398     The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6399     then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6400     This property will only be used by NDB handler until WL#2623 is done.
6401 
6402     Buffer memory management is done according to the following scenario:
6403     The caller allocates the buffer and provides it to the callee by filling
6404     the members of HANDLER_BUFFER structure.
6405     The callee consumes all or some fraction of the provided buffer space, and
6406     sets the HANDLER_BUFFER members accordingly.
6407     The callee may use the buffer memory until the next multi_range_read_init()
6408     call is made, all records have been read, or until index_end() call is
6409     made, whichever comes first.
6410 
6411   @retval 0  OK
6412   @retval 1  Error
6413 */
6414 
6415 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6416 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6417                                uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6418 {
6419   DBUG_ENTER("handler::multi_range_read_init");
6420   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6421   mrr_funcs= *seq_funcs;
6422   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6423   mrr_have_range= FALSE;
6424   DBUG_RETURN(0);
6425 }
6426 
6427 
6428 /**
6429   Get next record in MRR scan
6430 
6431   Default MRR implementation: read the next record
6432 
6433   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6434                           Otherwise, the opaque value associated with the range
6435                           that contains the returned record.
6436 
6437   @retval 0      OK
6438   @retval other  Error code
6439 */
6440 
multi_range_read_next(char ** range_info)6441 int handler::multi_range_read_next(char **range_info)
6442 {
6443   int result= HA_ERR_END_OF_FILE;
6444   int range_res;
6445   DBUG_ENTER("handler::multi_range_read_next");
6446 
6447   // Set status for the need to update generated fields
6448   m_update_generated_read_fields= table->has_gcol();
6449 
6450   if (!mrr_have_range)
6451   {
6452     mrr_have_range= TRUE;
6453     goto start;
6454   }
6455 
6456   do
6457   {
6458     /* Save a call if there can be only one row in range. */
6459     if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6460     {
6461       result= read_range_next();
6462       /* On success or non-EOF errors jump to the end. */
6463       if (result != HA_ERR_END_OF_FILE)
6464         break;
6465     }
6466     else
6467     {
6468       if (was_semi_consistent_read())
6469         goto scan_it_again;
6470     }
6471 
6472 start:
6473     /* Try the next range(s) until one matches a record. */
6474     while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6475     {
6476 scan_it_again:
6477       result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6478                                  &mrr_cur_range.start_key : 0,
6479                                mrr_cur_range.end_key.keypart_map ?
6480                                  &mrr_cur_range.end_key : 0,
6481                                MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6482                                mrr_is_output_sorted);
6483       if (result != HA_ERR_END_OF_FILE)
6484         break;
6485     }
6486   }
6487   while ((result == HA_ERR_END_OF_FILE) && !range_res);
6488 
6489   *range_info= mrr_cur_range.ptr;
6490 
6491   /* Update virtual generated fields */
6492   if (!result && m_update_generated_read_fields)
6493   {
6494     result= update_generated_read_fields(table->record[0], table, active_index);
6495     m_update_generated_read_fields= false;
6496   }
6497 
6498   DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6499   DBUG_RETURN(result);
6500 }
6501 
6502 
6503 /****************************************************************************
6504  * DS-MRR implementation
6505  ***************************************************************************/
6506 
6507 /**
6508   DS-MRR: Initialize and start MRR scan
6509 
6510   Initialize and start the MRR scan. Depending on the mode parameter, this
6511   may use default or DS-MRR implementation.
6512 
6513   The DS-MRR implementation will use a second handler object (h2) for
6514   doing scan on the index:
6515   - on the first call to this function the h2 handler will be created
6516     and h2 will be opened using the same index as the main handler
6517     is set to use. The index scan on the main index will be closed
6518     and it will be re-opened to read records from the table using either
6519     no key or the primary key. The h2 handler will be deleted when
6520     reset() is called (which should happen on the end of the statement).
6521   - when dsmrr_close() is called the index scan on h2 is closed.
6522   - on following calls to this function one of the following must be valid:
6523     a. if dsmrr_close has been called:
6524        the main handler (h) must be open on an index, h2 will be opened
6525        using this index, and the index on h will be closed and
6526        h will be re-opened to read reads from the table using either
6527        no key or the primary key.
6528     b. dsmrr_close has not been called:
6529        h2 will already be open, the main handler h must be set up
6530        to read records from the table (handler->inited is RND) either
6531        using the primary index or using no index at all.
6532 
6533   @param h_arg           Table handler to be used
6534   @param seq_funcs       Interval sequence enumeration functions
6535   @param seq_init_param  Interval sequence enumeration parameter
6536   @param n_ranges        Number of ranges in the sequence.
6537   @param mode            HA_MRR_* modes to use
6538   @param buf             INOUT Buffer to use
6539 
6540   @retval 0     Ok, Scan started.
6541   @retval other Error
6542 */
6543 
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6544 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6545                            void *seq_init_param, uint n_ranges, uint mode,
6546                            HANDLER_BUFFER *buf)
6547 {
6548   uint elem_size;
6549   int retval= 0;
6550   DBUG_ENTER("DsMrr_impl::dsmrr_init");
6551   THD *thd= h_arg->table->in_use;     // current THD
6552 
6553   /*
6554     index_merge may invoke a scan on an object for which dsmrr_info[_const]
6555     has not been called, so set the owner handler here as well.
6556   */
6557   h= h_arg;
6558 
6559   if (!hint_key_state(thd, h->table, h->active_index,
6560                       MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6561       mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6562   {
6563     use_default_impl= TRUE;
6564     retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6565                                               n_ranges, mode, buf);
6566     DBUG_RETURN(retval);
6567   }
6568 
6569   /*
6570     This assert will hit if we have pushed an index condition to the
6571     primary key index and then "change our mind" and use a different
6572     index for retrieving data with MRR. One of the following criteria
6573     must be true:
6574       1. We have not pushed an index conditon on this handler.
6575       2. We have pushed an index condition and this is on the currently used
6576          index.
6577       3. We have pushed an index condition but this is not for the primary key.
6578       4. We have pushed an index condition and this has been transferred to
6579          the clone (h2) of the handler object.
6580   */
6581   assert(!h->pushed_idx_cond ||
6582          h->pushed_idx_cond_keyno == h->active_index ||
6583          h->pushed_idx_cond_keyno != table->s->primary_key ||
6584          (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6585 
6586   rowids_buf= buf->buffer;
6587 
6588   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6589 
6590   if (is_mrr_assoc)
6591   {
6592     assert(!thd->status_var_aggregated);
6593     table->in_use->status_var.ha_multi_range_read_init_count++;
6594   }
6595 
6596   rowids_buf_end= buf->buffer_end;
6597   elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6598   rowids_buf_last= rowids_buf +
6599                       ((rowids_buf_end - rowids_buf)/ elem_size)*
6600                       elem_size;
6601   rowids_buf_end= rowids_buf_last;
6602 
6603   /*
6604     The DS-MRR scan uses a second handler object (h2) for doing the
6605     index scan. Create this by cloning the primary handler
6606     object. The h2 handler object is deleted when DsMrr_impl::reset()
6607     is called.
6608   */
6609   if (!h2)
6610   {
6611     handler *new_h2;
6612     /*
6613       ::clone() takes up a lot of stack, especially on 64 bit platforms.
6614       The constant 5 is an empiric result.
6615       @todo Is this still the case? Leave it as it is for now but could
6616             likely be removed?
6617     */
6618     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6619       DBUG_RETURN(1);
6620 
6621     if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6622       DBUG_RETURN(1);
6623     h2= new_h2; /* Ok, now can put it into h2 */
6624     table->prepare_for_position();
6625   }
6626 
6627   /*
6628     Open the index scan on h2 using the key from the primary handler.
6629   */
6630   if (h2->active_index == MAX_KEY)
6631   {
6632     assert(h->active_index != MAX_KEY);
6633     const uint mrr_keyno= h->active_index;
6634 
6635     if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6636       goto error;
6637 
6638     if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6639       goto error;
6640 
6641     if ((retval= h2->ha_index_init(mrr_keyno, false)))
6642       goto error;
6643 
6644     // Transfer ICP from h to h2
6645     if (mrr_keyno == h->pushed_idx_cond_keyno)
6646     {
6647       if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6648       {
6649         retval= 1;
6650         goto error;
6651       }
6652     }
6653     else
6654     {
6655       // Cancel any potentially previously pushed index conditions
6656       h2->cancel_pushed_idx_cond();
6657     }
6658   }
6659   else
6660   {
6661     /*
6662       h2 has already an open index. This happens when the DS-MRR scan
6663       is re-started without closing it first. In this case the primary
6664       handler must be used for reading records from the table, ie. it
6665       must not be opened for doing a new range scan. In this case
6666       the active_index must either not be set or be the primary key.
6667     */
6668     assert(h->inited == handler::RND);
6669     assert(h->active_index == MAX_KEY ||
6670            h->active_index == table->s->primary_key);
6671   }
6672 
6673   /*
6674     The index scan is now transferred to h2 and we can close the open
6675     index scan on the primary handler.
6676   */
6677   if (h->inited == handler::INDEX)
6678   {
6679     /*
6680       Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6681       which will close the index scan on h2. We need to keep it open, so
6682       temporarily move h2 out of the DsMrr object.
6683     */
6684     handler *save_h2= h2;
6685     h2= NULL;
6686     retval= h->ha_index_end();
6687     h2= save_h2;
6688     if (retval)
6689       goto error;
6690   }
6691 
6692   /*
6693     Verify consistency between h and h2.
6694   */
6695   assert(h->inited != handler::INDEX);
6696   assert(h->active_index == MAX_KEY ||
6697          h->active_index == table->s->primary_key);
6698   assert(h2->inited == handler::INDEX);
6699   assert(h2->active_index != MAX_KEY);
6700   assert(h->m_lock_type == h2->m_lock_type);
6701 
6702   if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6703                                                   n_ranges, mode, buf)))
6704     goto error;
6705 
6706   if ((retval= dsmrr_fill_buffer()))
6707     goto error;
6708 
6709   /*
6710     If the above call has scanned through all intervals in *seq, then
6711     adjust *buf to indicate that the remaining buffer space will not be used.
6712   */
6713   if (dsmrr_eof)
6714     buf->end_of_used_area= rowids_buf_last;
6715 
6716   /*
6717      h->inited == INDEX may occur when 'range checked for each record' is
6718      used.
6719   */
6720   if ((h->inited != handler::RND) &&
6721       ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6722        (h->ha_rnd_init(FALSE))))
6723   {
6724     retval= 1;
6725     goto error;
6726   }
6727 
6728   use_default_impl= FALSE;
6729   h->mrr_funcs= *seq_funcs;
6730 
6731   DBUG_RETURN(0);
6732 error:
6733   h2->ha_index_or_rnd_end();
6734   h2->ha_external_lock(thd, F_UNLCK);
6735   h2->ha_close();
6736   delete h2;
6737   h2= NULL;
6738   assert(retval != 0);
6739   DBUG_RETURN(retval);
6740 }
6741 
6742 
dsmrr_close()6743 void DsMrr_impl::dsmrr_close()
6744 {
6745   DBUG_ENTER("DsMrr_impl::dsmrr_close");
6746 
6747   // If there is an open index on h2, then close it
6748   if (h2 && h2->active_index != MAX_KEY)
6749   {
6750     h2->ha_index_or_rnd_end();
6751     h2->ha_external_lock(current_thd, F_UNLCK);
6752   }
6753   use_default_impl= true;
6754   DBUG_VOID_RETURN;
6755 }
6756 
6757 
reset()6758 void DsMrr_impl::reset()
6759 {
6760   DBUG_ENTER("DsMrr_impl::reset");
6761 
6762   if (h2)
6763   {
6764     // Close any ongoing DS-MRR scan
6765     dsmrr_close();
6766 
6767     // Close and delete the h2 handler
6768     h2->ha_close();
6769     delete h2;
6770     h2= NULL;
6771   }
6772   DBUG_VOID_RETURN;
6773 }
6774 
6775 
rowid_cmp(void * h,uchar * a,uchar * b)6776 static int rowid_cmp(void *h, uchar *a, uchar *b)
6777 {
6778   return ((handler*)h)->cmp_ref(a, b);
6779 }
6780 
6781 
6782 /**
6783   DS-MRR: Fill the buffer with rowids and sort it by rowid
6784 
6785   {This is an internal function of DiskSweep MRR implementation}
6786   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6787   buffer. When the buffer is full or scan is completed, sort the buffer by
6788   rowid and return.
6789 
6790   The function assumes that rowids buffer is empty when it is invoked.
6791 
6792   @param h  Table handler
6793 
6794   @retval 0      OK, the next portion of rowids is in the buffer,
6795                  properly ordered
6796   @retval other  Error
6797 */
6798 
dsmrr_fill_buffer()6799 int DsMrr_impl::dsmrr_fill_buffer()
6800 {
6801   char *range_info;
6802   int res= 0;
6803   DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6804   assert(rowids_buf < rowids_buf_end);
6805 
6806   /*
6807     Set key_read to TRUE since we only read fields from the index.
6808     This ensures that any virtual columns are read from index and are not
6809     attempted to be evaluated from base columns.
6810     (Do not use TABLE::set_keyread() since the MRR implementation operates
6811     with two handler objects, and set_keyread() would manipulate the keyread
6812     property of the wrong handler. MRR sets the handlers' keyread properties
6813     when initializing the MRR operation, independent of this call).
6814   */
6815   assert(table->key_read == FALSE);
6816   table->key_read= TRUE;
6817 
6818   rowids_buf_cur= rowids_buf;
6819   while ((rowids_buf_cur < rowids_buf_end) &&
6820          !(res= h2->handler::multi_range_read_next(&range_info)))
6821   {
6822     KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6823     if (h2->mrr_funcs.skip_index_tuple &&
6824         h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6825       continue;
6826 
6827     /* Put rowid, or {rowid, range_id} pair into the buffer */
6828     h2->position(table->record[0]);
6829     memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6830     rowids_buf_cur += h2->ref_length;
6831 
6832     if (is_mrr_assoc)
6833     {
6834       memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6835       rowids_buf_cur += sizeof(void*);
6836     }
6837   }
6838 
6839   // Restore key_read since the next read operation will read complete rows
6840   table->key_read= FALSE;
6841 
6842   if (res && res != HA_ERR_END_OF_FILE)
6843     DBUG_RETURN(res);
6844   dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6845 
6846   /* Sort the buffer contents by rowid */
6847   uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6848   size_t n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6849 
6850   my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6851             (void*)h);
6852   rowids_buf_last= rowids_buf_cur;
6853   rowids_buf_cur=  rowids_buf;
6854   DBUG_RETURN(0);
6855 }
6856 
6857 
6858 /*
6859   DS-MRR implementation: multi_range_read_next() function
6860 */
6861 
dsmrr_next(char ** range_info)6862 int DsMrr_impl::dsmrr_next(char **range_info)
6863 {
6864   int res;
6865   uchar *cur_range_info= 0;
6866   uchar *rowid;
6867 
6868   if (use_default_impl)
6869     return h->handler::multi_range_read_next(range_info);
6870 
6871   do
6872   {
6873     if (rowids_buf_cur == rowids_buf_last)
6874     {
6875       if (dsmrr_eof)
6876       {
6877         res= HA_ERR_END_OF_FILE;
6878         goto end;
6879       }
6880 
6881       res= dsmrr_fill_buffer();
6882       if (res)
6883         goto end;
6884     }
6885 
6886     /* return eof if there are no rowids in the buffer after re-fill attempt */
6887     if (rowids_buf_cur == rowids_buf_last)
6888     {
6889       res= HA_ERR_END_OF_FILE;
6890       goto end;
6891     }
6892     rowid= rowids_buf_cur;
6893 
6894     if (is_mrr_assoc)
6895       memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6896 
6897     rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6898     if (h2->mrr_funcs.skip_record &&
6899 	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
6900       continue;
6901     res= h->ha_rnd_pos(table->record[0], rowid);
6902     break;
6903   } while (true);
6904 
6905   if (is_mrr_assoc)
6906   {
6907     memcpy(range_info, rowid + h->ref_length, sizeof(void*));
6908   }
6909 end:
6910   return res;
6911 }
6912 
6913 
6914 /*
6915   DS-MRR implementation: multi_range_read_info() function
6916 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6917 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6918                                uint *bufsz, uint *flags, Cost_estimate *cost)
6919 {
6920   ha_rows res MY_ATTRIBUTE((unused));
6921   uint def_flags= *flags;
6922   uint def_bufsz= *bufsz;
6923 
6924   /* Get cost/flags/mem_usage of default MRR implementation */
6925   res=
6926     h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6927                                       &def_flags, cost);
6928   assert(!res);
6929 
6930   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6931       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6932   {
6933     /* Default implementation is choosen */
6934     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6935     *flags= def_flags;
6936     *bufsz= def_bufsz;
6937     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
6938   }
6939   else
6940   {
6941     /* *flags and *bufsz were set by choose_mrr_impl */
6942     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6943   }
6944   return 0;
6945 }
6946 
6947 
6948 /*
6949   DS-MRR Implementation: multi_range_read_info_const() function
6950 */
6951 
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6952 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6953                                  void *seq_init_param, uint n_ranges,
6954                                  uint *bufsz, uint *flags, Cost_estimate *cost)
6955 {
6956   ha_rows rows;
6957   uint def_flags= *flags;
6958   uint def_bufsz= *bufsz;
6959   /* Get cost/flags/mem_usage of default MRR implementation */
6960   rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
6961                                                 n_ranges, &def_bufsz,
6962                                                 &def_flags, cost);
6963   if (rows == HA_POS_ERROR)
6964   {
6965     /* Default implementation can't perform MRR scan => we can't either */
6966     return rows;
6967   }
6968 
6969   /*
6970     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6971     use the default MRR implementation (we need it for UPDATE/DELETE).
6972     Otherwise, make a choice based on cost and mrr* flags of
6973     @@optimizer_switch.
6974   */
6975   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6976       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6977   {
6978     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6979     *flags= def_flags;
6980     *bufsz= def_bufsz;
6981     assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
6982   }
6983   else
6984   {
6985     /* *flags and *bufsz were set by choose_mrr_impl */
6986     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6987   }
6988   return rows;
6989 }
6990 
6991 
6992 /**
6993   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6994 
6995   Make the choice between using Default MRR implementation and DS-MRR.
6996   This function contains common functionality factored out of dsmrr_info()
6997   and dsmrr_info_const(). The function assumes that the default MRR
6998   implementation's applicability requirements are satisfied.
6999 
7000   @param keyno       Index number
7001   @param rows        E(full rows to be retrieved)
7002   @param flags  IN   MRR flags provided by the MRR user
7003                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
7004                      else the value is not modified
7005   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
7006                      else the value is not modified
7007   @param cost   IN   Cost of default MRR implementation
7008                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
7009                      else the value is not modified
7010 
7011   @retval TRUE   Default MRR implementation should be used
7012   @retval FALSE  DS-MRR implementation should be used
7013 */
7014 
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)7015 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
7016                                  uint *bufsz, Cost_estimate *cost)
7017 {
7018   bool res;
7019   THD *thd= current_thd;
7020 
7021   const bool mrr_on= hint_key_state(thd, table, keyno, MRR_HINT_ENUM,
7022                                     OPTIMIZER_SWITCH_MRR);
7023   const bool force_dsmrr_by_hints=
7024     hint_key_state(thd, table, keyno, MRR_HINT_ENUM, 0) ||
7025     hint_table_state(thd, table, BKA_HINT_ENUM, 0);
7026 
7027   if (!(mrr_on || force_dsmrr_by_hints) ||
7028       *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
7029       (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
7030        key_uses_partial_cols(table, keyno) ||
7031        table->s->tmp_table != NO_TMP_TABLE)
7032   {
7033     /* Use the default implementation, don't modify args: See comments  */
7034     return TRUE;
7035   }
7036 
7037   /*
7038     If @@optimizer_switch has "mrr_cost_based" on, we should avoid
7039     using DS-MRR for queries where it is likely that the records are
7040     stored in memory. Since there is currently no way to determine
7041     this, we use a heuristic:
7042     a) if the storage engine has a memory buffer, DS-MRR is only
7043        considered if the table size is bigger than the buffer.
7044     b) if the storage engine does not have a memory buffer, DS-MRR is
7045        only considered if the table size is bigger than 100MB.
7046     c) Since there is an initial setup cost of DS-MRR, so it is only
7047        considered if at least 50 records will be read.
7048   */
7049   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
7050       !force_dsmrr_by_hints)
7051   {
7052     /*
7053       If the storage engine has a database buffer we use this as the
7054       minimum size the table should have before considering DS-MRR.
7055     */
7056     longlong min_file_size= table->file->get_memory_buffer_size();
7057     if (min_file_size == -1)
7058     {
7059       // No estimate for database buffer
7060       min_file_size= 100 * 1024 * 1024;    // 100 MB
7061     }
7062 
7063     if (table->file->stats.data_file_length <
7064         static_cast<ulonglong>(min_file_size) ||
7065         rows <= 50)
7066       return true;                 // Use the default implementation
7067   }
7068 
7069   Cost_estimate dsmrr_cost;
7070   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
7071     return TRUE;
7072 
7073   /*
7074     If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
7075     of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
7076     allows one to force use of DS-MRR whenever it is applicable without
7077     affecting other cost-based choices. Note that if MRR or BKA hint is
7078     specified, DS-MRR will be used regardless of cost.
7079   */
7080   const bool force_dsmrr=
7081     (force_dsmrr_by_hints ||
7082      !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
7083 
7084   if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
7085     dsmrr_cost= *cost;
7086 
7087   if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
7088   {
7089     *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
7090     *flags &= ~HA_MRR_SUPPORT_SORTED;    /* We can't provide ordered output */
7091     *cost= dsmrr_cost;
7092     res= FALSE;
7093   }
7094   else
7095   {
7096     /* Use the default MRR implementation */
7097     res= TRUE;
7098   }
7099   return res;
7100 }
7101 
7102 
7103 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7104                                     Cost_estimate *cost);
7105 
7106 
7107 /**
7108   Get cost of DS-MRR scan
7109 
7110   @param keynr              Index to be used
7111   @param rows               E(Number of rows to be scanned)
7112   @param flags              Scan parameters (HA_MRR_* flags)
7113   @param buffer_size INOUT  Buffer size
7114   @param cost        OUT    The cost
7115 
7116   @retval FALSE  OK
7117   @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
7118                  for even 1 rowid)
7119 */
7120 
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)7121 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
7122                                          uint *buffer_size,
7123                                          Cost_estimate *cost)
7124 {
7125   ha_rows rows_in_last_step;
7126   uint n_full_steps;
7127 
7128   const uint elem_size= h->ref_length +
7129                         sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
7130   const ha_rows max_buff_entries= *buffer_size / elem_size;
7131 
7132   if (!max_buff_entries)
7133     return TRUE; /* Buffer has not enough space for even 1 rowid */
7134 
7135   /* Number of iterations we'll make with full buffer */
7136   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7137 
7138   /*
7139     Get numbers of rows we'll be processing in last iteration, with
7140     non-full buffer
7141   */
7142   rows_in_last_step= rows % max_buff_entries;
7143 
7144   assert(cost->is_zero());
7145 
7146   if (n_full_steps)
7147   {
7148     get_sort_and_sweep_cost(table, max_buff_entries, cost);
7149     cost->multiply(n_full_steps);
7150   }
7151   else
7152   {
7153     /*
7154       Adjust buffer size since only parts of the buffer will be used:
7155       1. Adjust record estimate for the last scan to reduce likelyhood
7156          of needing more than one scan by adding 20 percent to the
7157          record estimate and by ensuring this is at least 100 records.
7158       2. If the estimated needed buffer size is lower than suggested by
7159          the caller then set it to the estimated buffer size.
7160     */
7161     const ha_rows keys_in_buffer=
7162       max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7163     *buffer_size= min<ulong>(*buffer_size,
7164                              static_cast<ulong>(keys_in_buffer) * elem_size);
7165   }
7166 
7167   Cost_estimate last_step_cost;
7168   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7169   (*cost)+= last_step_cost;
7170 
7171   /*
7172     Cost of memory is not included in the total_cost() function and
7173     thus will not be considered when comparing costs. Still, we
7174     record it in the cost estimate object for future use.
7175   */
7176   cost->add_mem(*buffer_size);
7177 
7178   /* Total cost of all index accesses */
7179   (*cost)+= h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7180 
7181   /*
7182     Add CPU cost for processing records (see
7183     @handler::multi_range_read_info_const()).
7184   */
7185   cost->add_cpu(table->cost_model()->row_evaluate_cost(
7186     static_cast<double>(rows)));
7187   return FALSE;
7188 }
7189 
7190 
7191 /*
7192   Get cost of one sort-and-sweep step
7193 
7194   SYNOPSIS
7195     get_sort_and_sweep_cost()
7196       table       Table being accessed
7197       nrows       Number of rows to be sorted and retrieved
7198       cost   OUT  The cost
7199 
7200   DESCRIPTION
7201     Get cost of these operations:
7202      - sort an array of #nrows ROWIDs using qsort
7203      - read #nrows records from table in a sweep.
7204 */
7205 
7206 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7207 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7208 {
7209   assert(cost->is_zero());
7210   if (nrows)
7211   {
7212     get_sweep_read_cost(table, nrows, FALSE, cost);
7213 
7214     /*
7215       @todo CostModel: For the old version of the cost model the
7216       following code should be used. For the new version of the cost
7217       model Cost_model::key_compare_cost() should be used.  When
7218       removing support for the old cost model this code should be
7219       removed. The reason for this is that we should get rid of the
7220       ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7221       the current value returned by key_compare_cost() this would
7222       overestimate the cost for sorting.
7223     */
7224 
7225     /*
7226       Constant for the cost of doing one key compare operation in the
7227       sort operation. We should have used the value returned by
7228       key_compare_cost() here but this would make the cost
7229       estimate of sorting very high for queries accessing many
7230       records. Until this constant is adjusted we introduce a constant
7231       that is more realistic. @todo: Replace this with
7232       key_compare_cost() when this has been given a realistic value.
7233     */
7234     const double ROWID_COMPARE_SORT_COST=
7235       table->cost_model()->key_compare_cost(1.0) / 10;
7236 
7237     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7238 
7239     // For the old version of the cost model this cost calculations should
7240     // be used....
7241     const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7242     // .... For the new cost model something like this should be used...
7243     // cpu_sort= nrows * log2(nrows) *
7244     //           table->cost_model()->rowid_compare_cost();
7245     cost->add_cpu(cpu_sort);
7246   }
7247 }
7248 
7249 
7250 /**
7251   Get cost of reading nrows table records in a "disk sweep"
7252 
7253   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7254   for an ordered sequence of rowids.
7255 
7256   We take into account that some of the records might be in a memory
7257   buffer while others need to be read from a secondary storage
7258   device. The model for this assumes hard disk IO. A disk read is
7259   performed as follows:
7260 
7261    1. The disk head is moved to the needed cylinder
7262    2. The controller waits for the plate to rotate
7263    3. The data is transferred
7264 
7265   Time to do #3 is insignificant compared to #2+#1.
7266 
7267   Time to move the disk head is proportional to head travel distance.
7268 
7269   Time to wait for the plate to rotate depends on whether the disk head
7270   was moved or not.
7271 
7272   If disk head wasn't moved, the wait time is proportional to distance
7273   between the previous block and the block we're reading.
7274 
7275   If the head was moved, we don't know how much we'll need to wait for the
7276   plate to rotate. We assume the wait time to be a variate with a mean of
7277   0.5 of full rotation time.
7278 
7279   Our cost units are "random disk seeks". The cost of random disk seek is
7280   actually not a constant, it depends one range of cylinders we're going
7281   to access. We make it constant by introducing a fuzzy concept of "typical
7282   datafile length" (it's fuzzy as it's hard to tell whether it should
7283   include index file, temp.tables etc). Then random seek cost is:
7284 
7285     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7286 
7287   We define half_rotation_cost as disk_seek_base_cost() (see
7288   Cost_model_server::disk_seek_base_cost()).
7289 
7290   @param      table        Table to be accessed
7291   @param      nrows        Number of rows to retrieve
7292   @param      interrupted  true <=> Assume that the disk sweep will be
7293                            interrupted by other disk IO. false - otherwise.
7294   @param[out] cost         the cost
7295 */
7296 
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7297 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7298                          Cost_estimate *cost)
7299 {
7300   DBUG_ENTER("get_sweep_read_cost");
7301 
7302   assert(cost->is_zero());
7303   if(nrows > 0)
7304   {
7305     const Cost_model_table *const cost_model= table->cost_model();
7306 
7307     // The total number of blocks used by this table
7308     double n_blocks=
7309       ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7310     if (n_blocks < 1.0)                         // When data_file_length is 0
7311       n_blocks= 1.0;
7312 
7313     /*
7314       The number of blocks that in average need to be read given that
7315       the records are uniformly distribution over the table.
7316     */
7317     double busy_blocks=
7318       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7319     if (busy_blocks < 1.0)
7320       busy_blocks= 1.0;
7321 
7322     DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7323                        busy_blocks));
7324     /*
7325       The random access cost for reading the data pages will be the upper
7326       limit for the sweep_cost.
7327     */
7328     cost->add_io(cost_model->page_read_cost(busy_blocks));
7329     if (!interrupted)
7330     {
7331       Cost_estimate sweep_cost;
7332       /*
7333         Assume reading pages from disk is done in one 'sweep'.
7334 
7335         The cost model and cost estimate for pages already in a memory
7336         buffer will be different from pages that needed to be read from
7337         disk. Calculate the number of blocks that likely already are
7338         in memory and the number of blocks that need to be read from
7339         disk.
7340       */
7341       const double busy_blocks_mem=
7342         busy_blocks * table->file->table_in_memory_estimate();
7343       const double busy_blocks_disk= busy_blocks - busy_blocks_mem;
7344       assert(busy_blocks_disk >= 0.0);
7345 
7346       // Cost of accessing blocks in main memory buffer
7347       sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7348 
7349       // Cost of reading blocks from disk in a 'sweep'
7350       const double seek_distance= (busy_blocks_disk > 1.0) ?
7351         n_blocks / busy_blocks_disk : n_blocks;
7352 
7353       const double disk_cost=
7354         busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7355       sweep_cost.add_io(disk_cost);
7356 
7357       /*
7358         For some cases, ex: when only few blocks need to be read and the
7359         seek distance becomes very large, the sweep cost model can produce
7360         a cost estimate that is larger than the cost of random access.
7361         To handle this case, we use the sweep cost only when it is less
7362         than the random access cost.
7363       */
7364       if (sweep_cost < *cost)
7365         *cost= sweep_cost;
7366     }
7367   }
7368   DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7369   DBUG_VOID_RETURN;
7370 }
7371 
7372 
7373 /****************************************************************************
7374  * DS-MRR implementation ends
7375  ***************************************************************************/
7376 
7377 /** @brief
7378   Read first row between two ranges.
7379   Store ranges for future calls to read_range_next.
7380 
7381   @param start_key		Start key. Is 0 if no min range
7382   @param end_key		End key.  Is 0 if no max range
7383   @param eq_range_arg	        Set to 1 if start_key == end_key
7384   @param sorted		Set to 1 if result should be sorted per key
7385 
7386   @note
7387     Record is read into table->record[0]
7388 
7389   @retval
7390     0			Found row
7391   @retval
7392     HA_ERR_END_OF_FILE	No rows in range
7393   @retval
7394     \#			Error code
7395 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7396 int handler::read_range_first(const key_range *start_key,
7397 			      const key_range *end_key,
7398 			      bool eq_range_arg,
7399                               bool sorted /* ignored */)
7400 {
7401   int result;
7402   DBUG_ENTER("handler::read_range_first");
7403 
7404   eq_range= eq_range_arg;
7405   set_end_range(end_key, RANGE_SCAN_ASC);
7406 
7407   range_key_part= table->key_info[active_index].key_part;
7408 
7409   if (!start_key)			// Read first record
7410     result= ha_index_first(table->record[0]);
7411   else
7412     result= ha_index_read_map(table->record[0],
7413                               start_key->key,
7414                               start_key->keypart_map,
7415                               start_key->flag);
7416   if (result)
7417     DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7418 		? HA_ERR_END_OF_FILE
7419 		: result);
7420 
7421   if (compare_key(end_range) <= 0)
7422   {
7423     DBUG_RETURN(0);
7424   }
7425   else
7426   {
7427     /*
7428       The last read row does not fall in the range. So request
7429       storage engine to release row lock if possible.
7430     */
7431     unlock_row();
7432     DBUG_RETURN(HA_ERR_END_OF_FILE);
7433   }
7434 }
7435 
7436 
7437 /** @brief
7438   Read next row between two endpoints.
7439 
7440   @note
7441     Record is read into table->record[0]
7442 
7443   @retval
7444     0			Found row
7445   @retval
7446     HA_ERR_END_OF_FILE	No rows in range
7447   @retval
7448     \#			Error code
7449 */
read_range_next()7450 int handler::read_range_next()
7451 {
7452   int result;
7453   DBUG_ENTER("handler::read_range_next");
7454 
7455   if (eq_range)
7456   {
7457     /* We trust that index_next_same always gives a row in range */
7458     DBUG_RETURN(ha_index_next_same(table->record[0],
7459                                    end_range->key,
7460                                    end_range->length));
7461   }
7462   result= ha_index_next(table->record[0]);
7463   if (result)
7464     DBUG_RETURN(result);
7465 
7466   if (compare_key(end_range) <= 0)
7467   {
7468     DBUG_RETURN(0);
7469   }
7470   else
7471   {
7472     /*
7473       The last read row does not fall in the range. So request
7474       storage engine to release row lock if possible.
7475     */
7476     unlock_row();
7477     DBUG_RETURN(HA_ERR_END_OF_FILE);
7478   }
7479 }
7480 
7481 /**
7482   Check if one of the columns in a key is a virtual generated column.
7483   @param part    the first part of the key to check
7484   @param length  the length of the key
7485   @retval true   if the key contains a virtual generated column
7486   @retval false  if the key does not contain a virtual generated column
7487 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7488 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7489   for (uint len = 0; len < length; len += part->store_length, ++part)
7490     if (part->field->is_virtual_gcol()) return true;
7491   return false;
7492 }
7493 
set_end_range(const key_range * range,enum_range_scan_direction direction)7494 void handler::set_end_range(const key_range* range,
7495                             enum_range_scan_direction direction)
7496 {
7497   if (range)
7498   {
7499     save_end_range= *range;
7500     end_range= &save_end_range;
7501     range_key_part= table->key_info[active_index].key_part;
7502     key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7503                                   (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7504     m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7505   }
7506   else
7507     end_range= NULL;
7508 
7509   range_scan_direction= direction;
7510 }
7511 
7512 
7513 /**
7514   Compare if found key (in row) is over max-value.
7515 
7516   @param range		range to compare to row. May be 0 for no range
7517 
7518   @seealso
7519     key.cc::key_cmp()
7520 
7521   @return
7522     The return value is SIGN(key_in_row - range_key):
7523 
7524     - 0   : Key is equal to range or 'range' == 0 (no range)
7525     - -1  : Key is less than range
7526     - 1   : Key is larger than range
7527 */
compare_key(key_range * range)7528 int handler::compare_key(key_range *range)
7529 {
7530   int cmp;
7531   if (!range || in_range_check_pushed_down)
7532     return 0;					// No max range
7533   cmp= key_cmp(range_key_part, range->key, range->length);
7534   if (!cmp)
7535     cmp= key_compare_result_on_equal;
7536   return cmp;
7537 }
7538 
7539 
7540 /*
7541   Compare if a found key (in row) is within the range.
7542 
7543   This function is similar to compare_key() but checks the range scan
7544   direction to determine if this is a descending scan. This function
7545   is used by the index condition pushdown implementation to determine
7546   if the read record is within the range scan.
7547 
7548   @param range Range to compare to row. May be NULL for no range.
7549 
7550   @seealso
7551     handler::compare_key()
7552 
7553   @return Returns whether the key is within the range
7554 
7555     - 0   : Key is equal to range or 'range' == 0 (no range)
7556     - -1  : Key is within the current range
7557     - 1   : Key is outside the current range
7558 */
7559 
compare_key_icp(const key_range * range) const7560 int handler::compare_key_icp(const key_range *range) const
7561 {
7562   int cmp;
7563   if (!range)
7564     return 0;					// no max range
7565   cmp= key_cmp(range_key_part, range->key, range->length);
7566   if (!cmp)
7567     cmp= key_compare_result_on_equal;
7568   if (range_scan_direction == RANGE_SCAN_DESC)
7569     cmp= -cmp;
7570   return cmp;
7571 }
7572 
7573 /**
7574   Change the offsets of all the fields in a key range.
7575 
7576   @param range	  the key range
7577   @param key_part the first key part
7578   @param diff	  how much to change the offsets with
7579 */
7580 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7581 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7582 		       my_ptrdiff_t diff)
7583 {
7584   for (size_t len= 0; len < range->length;
7585        len+= key_part->store_length, ++key_part)
7586     key_part->field->move_field_offset(diff);
7587 }
7588 
7589 /**
7590   Check if the key in the given buffer (which is not necessarily
7591   TABLE::record[0]) is within range. Called by the storage engine to
7592   avoid reading too many rows.
7593 
7594   @param buf  the buffer that holds the key
7595   @retval -1 if the key is within the range
7596   @retval  0 if the key is equal to the end_range key, and
7597              key_compare_result_on_equal is 0
7598   @retval  1 if the key is outside the range
7599 */
compare_key_in_buffer(const uchar * buf) const7600 int handler::compare_key_in_buffer(const uchar *buf) const
7601 {
7602   assert(end_range != NULL);
7603 
7604   /*
7605     End range on descending scans is only checked with ICP for now, and then we
7606     check it with compare_key_icp() instead of this function.
7607   */
7608   assert(range_scan_direction == RANGE_SCAN_ASC);
7609 
7610   // Make the fields in the key point into the buffer instead of record[0].
7611   const my_ptrdiff_t diff= buf - table->record[0];
7612   if (diff != 0)
7613     move_key_field_offsets(end_range, range_key_part, diff);
7614 
7615   // Compare the key in buf against end_range.
7616   int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7617   if (cmp == 0)
7618     cmp= key_compare_result_on_equal;
7619 
7620   // Reset the field offsets.
7621   if (diff != 0)
7622     move_key_field_offsets(end_range, range_key_part, -diff);
7623 
7624   return cmp;
7625 }
7626 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7627 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7628                                 key_part_map keypart_map,
7629                                 enum ha_rkey_function find_flag)
7630 {
7631   int error, error1;
7632   error= index_init(index, 0);
7633   if (!error)
7634   {
7635     error= index_read_map(buf, key, keypart_map, find_flag);
7636     error1= index_end();
7637   }
7638   return error ?  error : error1;
7639 }
7640 
7641 
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7642 uint calculate_key_len(TABLE *table, uint key,
7643                        key_part_map keypart_map)
7644 {
7645   /* works only with key prefixes */
7646   assert(((keypart_map + 1) & keypart_map) == 0);
7647 
7648   KEY *key_info= table->key_info + key;
7649   KEY_PART_INFO *key_part= key_info->key_part;
7650   KEY_PART_INFO *end_key_part= key_part + actual_key_parts(key_info);
7651   uint length= 0;
7652 
7653   while (key_part < end_key_part && keypart_map)
7654   {
7655     length+= key_part->store_length;
7656     keypart_map >>= 1;
7657     key_part++;
7658   }
7659   return length;
7660 }
7661 
7662 
7663 /**
7664   Returns a list of all known extensions.
7665 
7666     No mutexes, worst case race is a minor surplus memory allocation
7667     We have to recreate the extension map if mysqld is restarted (for example
7668     within libmysqld)
7669 
7670   @retval
7671     pointer		pointer to TYPELIB structure
7672 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7673 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7674                                void *arg)
7675 {
7676   List<char> *found_exts= (List<char> *) arg;
7677   handlerton *hton= plugin_data<handlerton*>(plugin);
7678   handler *file;
7679   if (hton->state == SHOW_OPTION_YES && hton->create &&
7680       (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7681   {
7682     List_iterator_fast<char> it(*found_exts);
7683     const char **ext, *old_ext;
7684 
7685     for (ext= file->bas_ext(); *ext; ext++)
7686     {
7687       while ((old_ext= it++))
7688       {
7689         if (!strcmp(old_ext, *ext))
7690 	  break;
7691       }
7692       if (!old_ext)
7693         found_exts->push_back((char *) *ext);
7694 
7695       it.rewind();
7696     }
7697     delete file;
7698   }
7699   return FALSE;
7700 }
7701 
ha_known_exts()7702 TYPELIB* ha_known_exts()
7703 {
7704   TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7705   known_extensions->name= "known_exts";
7706   known_extensions->type_lengths= NULL;
7707 
7708   List<char> found_exts;
7709   const char **ext, *old_ext;
7710 
7711   found_exts.push_back((char*) TRG_EXT);
7712   found_exts.push_back((char*) TRN_EXT);
7713 
7714   plugin_foreach(NULL, exts_handlerton,
7715                  MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7716 
7717   size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7718   ext= (const char **) sql_alloc(arr_length);
7719 
7720   assert(NULL != ext);
7721   known_extensions->count= found_exts.elements;
7722   known_extensions->type_names= ext;
7723 
7724   List_iterator_fast<char> it(found_exts);
7725   while ((old_ext= it++))
7726     *ext++= old_ext;
7727   *ext= NULL;
7728   return known_extensions;
7729 }
7730 
7731 
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7732 static bool stat_print(THD *thd, const char *type, size_t type_len,
7733                        const char *file, size_t file_len,
7734                        const char *status, size_t status_len)
7735 {
7736   Protocol *protocol= thd->get_protocol();
7737   protocol->start_row();
7738   protocol->store(type, type_len, system_charset_info);
7739   protocol->store(file, file_len, system_charset_info);
7740   protocol->store(status, status_len, system_charset_info);
7741   if (protocol->end_row())
7742     return TRUE;
7743   return FALSE;
7744 }
7745 
7746 
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7747 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7748                                    void *arg)
7749 {
7750   enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7751   handlerton *hton= plugin_data<handlerton*>(plugin);
7752   if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7753       hton->show_status(hton, thd, stat_print, stat))
7754     return TRUE;
7755   return FALSE;
7756 }
7757 
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7758 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7759 {
7760   List<Item> field_list;
7761   bool result;
7762 
7763   field_list.push_back(new Item_empty_string("Type",10));
7764   field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7765   field_list.push_back(new Item_empty_string("Status",10));
7766 
7767   if (thd->send_result_metadata(&field_list,
7768                                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7769     return TRUE;
7770 
7771   if (db_type == NULL)
7772   {
7773     result= plugin_foreach(thd, showstat_handlerton,
7774                            MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7775   }
7776   else
7777   {
7778     if (db_type->state != SHOW_OPTION_YES)
7779     {
7780       const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7781       result= stat_print(thd, name->str, name->length,
7782                          "", 0, "DISABLED", 8) ? 1 : 0;
7783     }
7784     else
7785     {
7786       DBUG_EXECUTE_IF("simulate_show_status_failure",
7787                       DBUG_SET("+d,simulate_net_write_failure"););
7788       result= db_type->show_status &&
7789               db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7790       DBUG_EXECUTE_IF("simulate_show_status_failure",
7791                       DBUG_SET("-d,simulate_net_write_failure"););
7792     }
7793   }
7794 
7795   if (!result)
7796     my_eof(thd);
7797   return result;
7798 }
7799 
7800 /*
7801   Function to check if the conditions for row-based binlogging is
7802   correct for the table.
7803 
7804   A row in the given table should be replicated if:
7805   - Row-based replication is enabled in the current thread
7806   - The binlog is enabled
7807   - It is not a temporary table
7808   - The binary log is open
7809   - The database the table resides in shall be binlogged (binlog_*_db rules)
7810   - table is not mysql.event
7811 */
7812 
check_table_binlog_row_based(THD * thd,TABLE * table)7813 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7814 {
7815   if (table->s->cached_row_logging_check == -1)
7816   {
7817     int const check(table->s->tmp_table == NO_TMP_TABLE &&
7818                     ! table->no_replicate &&
7819                     binlog_filter->db_ok(table->s->db.str));
7820     table->s->cached_row_logging_check= check;
7821   }
7822 
7823   assert(table->s->cached_row_logging_check == 0 ||
7824          table->s->cached_row_logging_check == 1);
7825 
7826   return (thd->is_current_stmt_binlog_format_row() &&
7827           table->s->cached_row_logging_check &&
7828           (thd->variables.option_bits & OPTION_BIN_LOG) &&
7829           mysql_bin_log.is_open());
7830 }
7831 
7832 
7833 /** @brief
7834    Write table maps for all (manually or automatically) locked tables
7835    to the binary log.
7836 
7837    SYNOPSIS
7838      write_locked_table_maps()
7839        thd     Pointer to THD structure
7840 
7841    DESCRIPTION
7842        This function will generate and write table maps for all tables
7843        that are locked by the thread 'thd'.
7844 
7845    RETURN VALUE
7846        0   All OK
7847        1   Failed to write all table maps
7848 
7849    SEE ALSO
7850        THD::lock
7851 */
7852 
write_locked_table_maps(THD * thd)7853 static int write_locked_table_maps(THD *thd)
7854 {
7855   DBUG_ENTER("write_locked_table_maps");
7856   DBUG_PRINT("enter", ("thd: 0x%lx  thd->lock: 0x%lx "
7857                        "thd->extra_lock: 0x%lx",
7858                        (long) thd, (long) thd->lock, (long) thd->extra_lock));
7859 
7860   DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7861 
7862   if (thd->get_binlog_table_maps() == 0)
7863   {
7864     MYSQL_LOCK *locks[2];
7865     locks[0]= thd->extra_lock;
7866     locks[1]= thd->lock;
7867     for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7868     {
7869       MYSQL_LOCK const *const lock= locks[i];
7870       if (lock == NULL)
7871         continue;
7872 
7873       bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7874       TABLE **const end_ptr= lock->table + lock->table_count;
7875       for (TABLE **table_ptr= lock->table ;
7876            table_ptr != end_ptr ;
7877            ++table_ptr)
7878       {
7879         TABLE *const table= *table_ptr;
7880         DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7881         if (table->current_lock == F_WRLCK &&
7882             check_table_binlog_row_based(thd, table))
7883         {
7884           /*
7885             We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7886             (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7887             compatible behavior with the STMT based replication even when
7888             the table is not transactional. In other words, if the operation
7889             fails while executing the insert phase nothing is written to the
7890             binlog.
7891 
7892             Note that at this point, we check the type of a set of tables to
7893             create the table map events. In the function binlog_log_row(),
7894             which calls the current function, we check the type of the table
7895             of the current row.
7896           */
7897           bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7898                                 table->file->has_transactions();
7899           int const error= thd->binlog_write_table_map(table, has_trans,
7900                                                        need_binlog_rows_query);
7901           /* Binlog Rows_query log event once for one statement which updates
7902              two or more tables.*/
7903           if (need_binlog_rows_query)
7904             need_binlog_rows_query= FALSE;
7905           /*
7906             If an error occurs, it is the responsibility of the caller to
7907             roll back the transaction.
7908           */
7909           if (unlikely(error))
7910             DBUG_RETURN(1);
7911         }
7912       }
7913     }
7914   }
7915   DBUG_RETURN(0);
7916 }
7917 
7918 typedef bool Log_func(THD*, TABLE*, bool,
7919                       const uchar*, const uchar*);
7920 
7921 /**
7922 
7923   The purpose of an instance of this class is to :
7924 
7925   1) Given a TABLE instance, backup the given TABLE::read_set, TABLE::write_set
7926      and restore those members upon this instance disposal.
7927 
7928   2) Store a reference to a dynamically allocated buffer and dispose of it upon
7929      this instance disposal.
7930  */
7931 
7932 class Binlog_log_row_cleanup
7933 {
7934  public:
7935   /**
7936     This constructor aims to create temporary copies of readset and writeset.
7937     @param table                 A pointer to TABLE object
7938     @param temp_read_bitmap      Temporary BITMAP to store read_set.
7939     @param temp_write_bitmap     Temporary BITMAP to store write_set.
7940   */
Binlog_log_row_cleanup(TABLE & table,MY_BITMAP & temp_read_bitmap,MY_BITMAP & temp_write_bitmap)7941   Binlog_log_row_cleanup(TABLE &table, MY_BITMAP &temp_read_bitmap,
7942                          MY_BITMAP &temp_write_bitmap)
7943       : m_cleanup_table(table),
7944         m_cleanup_read_bitmap(temp_read_bitmap),
7945         m_cleanup_write_bitmap(temp_write_bitmap)
7946   {
7947     bitmap_copy(&this->m_cleanup_read_bitmap, this->m_cleanup_table.read_set);
7948     bitmap_copy(&this->m_cleanup_write_bitmap, this->m_cleanup_table.write_set);
7949   }
7950 
7951   /**
7952     This destructor aims to restore the original readset and writeset and
7953     delete the temporary copies.
7954   */
~Binlog_log_row_cleanup()7955   virtual ~Binlog_log_row_cleanup()
7956   {
7957     bitmap_copy(this->m_cleanup_table.read_set, &this->m_cleanup_read_bitmap);
7958     bitmap_copy(this->m_cleanup_table.write_set, &this->m_cleanup_write_bitmap);
7959     bitmap_free(&this->m_cleanup_read_bitmap);
7960     bitmap_free(&this->m_cleanup_write_bitmap);
7961   }
7962 
7963  private:
7964   TABLE &m_cleanup_table;  // Creating a TABLE to get access to its members.
7965   MY_BITMAP &m_cleanup_read_bitmap;   // Temporary bitmap to store read_set.
7966   MY_BITMAP &m_cleanup_write_bitmap;  // Temporary bitmap to store write_set.
7967 };
7968 
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7969 int binlog_log_row(TABLE* table,
7970                           const uchar *before_record,
7971                           const uchar *after_record,
7972                           Log_func *log_func)
7973 {
7974   bool error= 0;
7975   THD *const thd= table->in_use;
7976 
7977   if (check_table_binlog_row_based(thd, table))
7978   {
7979     if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF)
7980     {
7981       try
7982       {
7983         MY_BITMAP save_read_set;
7984         MY_BITMAP save_write_set;
7985         if (bitmap_init(&save_read_set, NULL, table->s->fields, false) ||
7986             bitmap_init(&save_write_set, NULL, table->s->fields, false))
7987         {
7988           my_error(ER_OUT_OF_RESOURCES, MYF(0));
7989           return HA_ERR_RBR_LOGGING_FAILED;
7990         }
7991 
7992         Binlog_log_row_cleanup cleanup_sentry(*table, save_read_set,
7993                                               save_write_set);
7994         if (thd->variables.binlog_row_image == 0)
7995         {
7996           for (uint key_number= 0; key_number < table->s->keys; ++key_number)
7997           {
7998             if (((table->key_info[key_number].flags & (HA_NOSAME)) ==
7999                  HA_NOSAME))
8000             {
8001               table->mark_columns_used_by_index_no_reset(key_number,
8002                                                          table->read_set);
8003               table->mark_columns_used_by_index_no_reset(key_number,
8004                                                          table->write_set);
8005             }
8006           }
8007         }
8008         const uchar *records[]= {after_record, before_record};
8009 
8010         for (int record= 0; record < 2; ++record)
8011         {
8012           if (records[record] != NULL)
8013           {
8014             assert(records[record] == table->record[0] ||
8015                    records[record] == table->record[1]);
8016             bool res= add_pke(table, thd, records[record]);
8017             if (res) return HA_ERR_RBR_LOGGING_FAILED;
8018           }
8019         }
8020       }
8021       catch (const std::bad_alloc &)
8022       {
8023         my_error(ER_OUT_OF_RESOURCES, MYF(0));
8024         return HA_ERR_RBR_LOGGING_FAILED;
8025       }
8026     }
8027     DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
8028               (table->s->fields + 7) / 8);
8029 
8030     /*
8031       If there are no table maps written to the binary log, this is
8032       the first row handled in this statement. In that case, we need
8033       to write table maps for all locked tables to the binary log.
8034     */
8035     if (likely(!(error= write_locked_table_maps(thd))))
8036     {
8037       /*
8038         We need to have a transactional behavior for SQLCOM_CREATE_TABLE
8039         (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
8040         compatible behavior with the STMT based replication even when
8041         the table is not transactional. In other words, if the operation
8042         fails while executing the insert phase nothing is written to the
8043         binlog.
8044       */
8045       bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8046                            table->file->has_transactions();
8047       error=
8048         (*log_func)(thd, table, has_trans, before_record, after_record);
8049     }
8050   }
8051   return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
8052 }
8053 
ha_external_lock(THD * thd,int lock_type)8054 int handler::ha_external_lock(THD *thd, int lock_type)
8055 {
8056   int error;
8057   DBUG_ENTER("handler::ha_external_lock");
8058   /*
8059     Whether this is lock or unlock, this should be true, and is to verify that
8060     if get_auto_increment() was called (thus may have reserved intervals or
8061     taken a table lock), ha_release_auto_increment() was too.
8062   */
8063   assert(next_insert_id == 0);
8064   /* Consecutive calls for lock without unlocking in between is not allowed */
8065   assert(table_share->tmp_table != NO_TMP_TABLE ||
8066          ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
8067           lock_type == F_UNLCK));
8068   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
8069   assert(inited == NONE || table->open_by_handler);
8070 
8071   if (MYSQL_HANDLER_RDLOCK_START_ENABLED() && lock_type == F_RDLCK)
8072   {
8073     MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
8074                                table_share->table_name.str);
8075   }
8076   else if (MYSQL_HANDLER_WRLOCK_START_ENABLED() && lock_type == F_WRLCK)
8077   {
8078     MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
8079                                table_share->table_name.str);
8080   }
8081   else if (MYSQL_HANDLER_UNLOCK_START_ENABLED() && lock_type == F_UNLCK)
8082   {
8083     MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
8084                                table_share->table_name.str);
8085   }
8086 
8087   ha_statistic_increment(&SSV::ha_external_lock_count);
8088 
8089   MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
8090     { error= external_lock(thd, lock_type); })
8091 
8092   /*
8093     We cache the table flags if the locking succeeded. Otherwise, we
8094     keep them as they were when they were fetched in ha_open().
8095   */
8096 
8097   if (error == 0)
8098   {
8099     /*
8100       The lock type is needed by MRR when creating a clone of this handler
8101       object.
8102     */
8103     m_lock_type= lock_type;
8104     cached_table_flags= table_flags();
8105   }
8106 
8107   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() && lock_type == F_RDLCK)
8108   {
8109     MYSQL_HANDLER_RDLOCK_DONE(error);
8110   }
8111   else if (MYSQL_HANDLER_WRLOCK_DONE_ENABLED() && lock_type == F_WRLCK)
8112   {
8113     MYSQL_HANDLER_WRLOCK_DONE(error);
8114   }
8115   else if (MYSQL_HANDLER_UNLOCK_DONE_ENABLED() && lock_type == F_UNLCK)
8116   {
8117     MYSQL_HANDLER_UNLOCK_DONE(error);
8118   }
8119   DBUG_RETURN(error);
8120 }
8121 
8122 
8123 /** @brief
8124   Check handler usage and reset state of file to after 'open'
8125 
8126   @note can be called regardless of it is locked or not.
8127 */
ha_reset()8128 int handler::ha_reset()
8129 {
8130   DBUG_ENTER("handler::ha_reset");
8131   /* Check that we have called all proper deallocation functions */
8132   assert((uchar*) table->def_read_set.bitmap +
8133          table->s->column_bitmap_size ==
8134          (uchar*) table->def_write_set.bitmap);
8135   assert(bitmap_is_set_all(&table->s->all_set));
8136   assert(table->key_read == 0);
8137   /* ensure that ha_index_end / ha_rnd_end has been called */
8138   assert(inited == NONE);
8139   /* Free cache used by filesort */
8140   free_io_cache(table);
8141   /* reset the bitmaps to point to defaults */
8142   table->default_column_bitmaps();
8143   /* Reset information about pushed engine conditions */
8144   pushed_cond= NULL;
8145   /* Reset information about pushed index conditions */
8146   cancel_pushed_idx_cond();
8147 
8148   const int retval= reset();
8149   DBUG_RETURN(retval);
8150 }
8151 
8152 
ha_write_row(uchar * buf)8153 int handler::ha_write_row(uchar *buf)
8154 {
8155   int error;
8156   Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
8157   assert(table_share->tmp_table != NO_TMP_TABLE ||
8158          m_lock_type == F_WRLCK);
8159 
8160   DBUG_ENTER("handler::ha_write_row");
8161   DBUG_EXECUTE_IF("inject_error_ha_write_row",
8162                   DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8163   DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
8164                   DBUG_RETURN(HA_ERR_SE_OUT_OF_MEMORY); );
8165   MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
8166   mark_trx_read_write();
8167 
8168   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8169                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8170                   set_my_errno(HA_ERR_CRASHED);
8171                   DBUG_RETURN(HA_ERR_CRASHED););
8172 
8173   MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
8174     { error= write_row(buf); })
8175 
8176   MYSQL_INSERT_ROW_DONE(error);
8177   if (unlikely(error))
8178     DBUG_RETURN(error);
8179 
8180   if (unlikely((error= binlog_log_row(table, 0, buf, log_func))))
8181     DBUG_RETURN(error); /* purecov: inspected */
8182 
8183   DEBUG_SYNC_C("ha_write_row_end");
8184   DBUG_RETURN(0);
8185 }
8186 
8187 
ha_update_row(const uchar * old_data,uchar * new_data)8188 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
8189 {
8190   int error;
8191   assert(table_share->tmp_table != NO_TMP_TABLE ||
8192          m_lock_type == F_WRLCK);
8193   Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
8194 
8195   /*
8196     Some storage engines require that the new record is in record[0]
8197     (and the old record is in record[1]).
8198    */
8199   assert(new_data == table->record[0]);
8200   assert(old_data == table->record[1]);
8201 
8202   DBUG_ENTER("hanlder::ha_update_row");
8203   DBUG_EXECUTE_IF("inject_error_ha_update_row",
8204                   DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8205 
8206   MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
8207   mark_trx_read_write();
8208 
8209   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8210                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8211                   set_my_errno(HA_ERR_CRASHED);
8212                   return(HA_ERR_CRASHED););
8213 
8214   MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
8215     { error= update_row(old_data, new_data);})
8216 
8217   MYSQL_UPDATE_ROW_DONE(error);
8218   if (unlikely(error))
8219     DBUG_RETURN(error);
8220   if (unlikely((error= binlog_log_row(table, old_data, new_data, log_func))))
8221     DBUG_RETURN(error);
8222   DBUG_RETURN(0);
8223 }
8224 
ha_delete_row(const uchar * buf)8225 int handler::ha_delete_row(const uchar *buf)
8226 {
8227   int error;
8228   assert(table_share->tmp_table != NO_TMP_TABLE ||
8229          m_lock_type == F_WRLCK);
8230   Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8231   /*
8232     Normally table->record[0] is used, but sometimes table->record[1] is used.
8233   */
8234   assert(buf == table->record[0] ||
8235          buf == table->record[1]);
8236   DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8237                   return HA_ERR_INTERNAL_ERROR; );
8238 
8239   DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8240                   my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8241                   set_my_errno(HA_ERR_CRASHED);
8242                   return(HA_ERR_CRASHED););
8243 
8244   MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8245   mark_trx_read_write();
8246 
8247   MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
8248     { error= delete_row(buf);})
8249 
8250   MYSQL_DELETE_ROW_DONE(error);
8251   if (unlikely(error))
8252     return error;
8253   if (unlikely((error= binlog_log_row(table, buf, 0, log_func))))
8254     return error;
8255   return 0;
8256 }
8257 
8258 
8259 
8260 /** @brief
8261   use_hidden_primary_key() is called in case of an update/delete when
8262   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8263   but we don't have a primary key
8264 */
use_hidden_primary_key()8265 void handler::use_hidden_primary_key()
8266 {
8267   /* fallback to use all columns in the table to identify row */
8268   table->use_all_columns();
8269 }
8270 
8271 
8272 /**
8273   Get an initialized ha_share.
8274 
8275   @return Initialized ha_share
8276     @retval NULL    ha_share is not yet initialized.
8277     @retval != NULL previous initialized ha_share.
8278 
8279   @note
8280   If not a temp table, then LOCK_ha_data must be held.
8281 */
8282 
get_ha_share_ptr()8283 Handler_share *handler::get_ha_share_ptr()
8284 {
8285   DBUG_ENTER("handler::get_ha_share_ptr");
8286   assert(ha_share && table_share);
8287 
8288 #ifndef NDEBUG
8289   if (table_share->tmp_table == NO_TMP_TABLE)
8290     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8291 #endif
8292 
8293   DBUG_RETURN(*ha_share);
8294 }
8295 
8296 
8297 /**
8298   Set ha_share to be used by all instances of the same table/partition.
8299 
8300   @param ha_share    Handler_share to be shared.
8301 
8302   @note
8303   If not a temp table, then LOCK_ha_data must be held.
8304 */
8305 
set_ha_share_ptr(Handler_share * arg_ha_share)8306 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8307 {
8308   DBUG_ENTER("handler::set_ha_share_ptr");
8309   assert(ha_share);
8310 #ifndef NDEBUG
8311   if (table_share->tmp_table == NO_TMP_TABLE)
8312     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8313 #endif
8314 
8315   *ha_share= arg_ha_share;
8316   DBUG_VOID_RETURN;
8317 }
8318 
8319 
8320 /**
8321   Take a lock for protecting shared handler data.
8322 */
8323 
lock_shared_ha_data()8324 void handler::lock_shared_ha_data()
8325 {
8326   assert(table_share);
8327   if (table_share->tmp_table == NO_TMP_TABLE)
8328     mysql_mutex_lock(&table_share->LOCK_ha_data);
8329 }
8330 
8331 
8332 /**
8333   Release lock for protecting ha_share.
8334 */
8335 
unlock_shared_ha_data()8336 void handler::unlock_shared_ha_data()
8337 {
8338   assert(table_share);
8339   if (table_share->tmp_table == NO_TMP_TABLE)
8340     mysql_mutex_unlock(&table_share->LOCK_ha_data);
8341 }
8342 
8343 
8344 /**
8345   This structure is a helper structure for passing the length and pointer of
8346   blob space allocated by storage engine.
8347 */
8348 struct blob_len_ptr{
8349   uint length;  // length of the blob
8350   uchar *ptr;   // pointer of the value
8351 };
8352 
8353 
8354 /**
8355   Get the blob length and pointer of allocated space from the record buffer.
8356 
8357   During evaluating the blob virtual generated columns, the blob space will
8358   be allocated by server. In order to keep the blob data after the table is
8359   closed, we need write the data into a specified space allocated by storage
8360   engine. Here, we have to extract the space pointer and length from the
8361   record buffer.
8362   After we get the value of virtual generated columns, copy the data into
8363   the specified space and store it in the record buffer (@see copy_blob_data()).
8364 
8365   @param table                    the pointer of table
8366   @param fields                   bitmap of field index of evaluated
8367                                   generated column
8368   @param[out] blob_len_ptr_array  an array to record the length and pointer
8369                                   of allocated space by storage engine.
8370   @note The caller should provide the blob_len_ptr_array with a size of
8371         MAX_FIELDS.
8372 */
8373 
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8374 static void extract_blob_space_and_length_from_record_buff(const TABLE *table,
8375                                            const MY_BITMAP *const fields,
8376                                            blob_len_ptr *blob_len_ptr_array)
8377 {
8378   int num= 0;
8379   for (Field **vfield= table->vfield; *vfield; vfield++)
8380   {
8381     // Check if this field should be included
8382     if (bitmap_is_set(fields, (*vfield)->field_index) &&
8383         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8384     {
8385       blob_len_ptr_array[num].length= (*vfield)->data_length();
8386       // TODO: The following check is only for Innodb.
8387       assert(blob_len_ptr_array[num].length == 255 ||
8388              blob_len_ptr_array[num].length == 768 ||
8389              blob_len_ptr_array[num].length == 3073);
8390 
8391       uchar *ptr;
8392       (*vfield)->get_ptr(&ptr);
8393       blob_len_ptr_array[num].ptr= ptr;
8394 
8395       // Let server allocate the space for BLOB virtual generated columns
8396       (*vfield)->reset();
8397 
8398       num++;
8399       assert(num <= MAX_FIELDS);
8400     }
8401   }
8402 }
8403 
8404 
8405 /**
8406   Copy the value of BLOB virtual generated columns into the space allocated
8407   by storage engine.
8408 
8409   This is because the table is closed after evaluating the value. In order to
8410   keep the BLOB value after the table is closed, we have to copy the value into
8411   the place where storage engine prepares for.
8412 
8413   @param table              pointer of the table to be operated on
8414   @param fields             bitmap of field index of evaluated generated column
8415   @param blob_len_ptr_array array of length and pointer of allocated space by
8416                             storage engine.
8417 */
8418 
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8419 static void copy_blob_data(const TABLE *table,
8420                            const MY_BITMAP *const fields,
8421                            blob_len_ptr *blob_len_ptr_array)
8422 {
8423   uint  num= 0;
8424   for (Field **vfield= table->vfield; *vfield; vfield++)
8425   {
8426     // Check if this field should be included
8427     if (bitmap_is_set(fields, (*vfield)->field_index) &&
8428         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8429     {
8430       assert(blob_len_ptr_array[num].length > 0);
8431       assert(blob_len_ptr_array[num].ptr != NULL);
8432 
8433       /*
8434         Only copy as much of the blob as the storage engine has
8435         allocated space for. This is sufficient since the only use of the
8436         blob in the storage engine is for using a prefix of it in a
8437         secondary index.
8438       */
8439       uint length= (*vfield)->data_length();
8440       const uint alloc_len= blob_len_ptr_array[num].length;
8441       length= length > alloc_len ? alloc_len : length;
8442 
8443       uchar *ptr;
8444       (*vfield)->get_ptr(&ptr);
8445       memcpy(blob_len_ptr_array[num].ptr, ptr, length);
8446       (down_cast<Field_blob *>(*vfield))->store_in_allocated_space(
8447                             pointer_cast<char *>(blob_len_ptr_array[num].ptr),
8448                             length);
8449       num++;
8450       assert(num <= MAX_FIELDS);
8451     }
8452   }
8453 }
8454 
8455 
8456 /*
8457   Evaluate generated column's value. This is an internal helper reserved for
8458   handler::my_eval_gcolumn_expr().
8459 
8460   @param thd        pointer of THD
8461   @param table      The pointer of table where evaluted generated
8462                     columns are in
8463   @param fields     bitmap of field index of evaluated generated column
8464   @param[in,out] record record buff of base columns generated column depends.
8465                         After calling this function, it will be used to return
8466                         the value of generated column.
8467   @param in_purge   whehter the function is called by purge thread
8468 
8469   @return true in case of error, false otherwise.
8470 */
8471 
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge)8472 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8473                                         const MY_BITMAP *const fields,
8474                                         uchar *record,
8475                                         bool in_purge)
8476 {
8477   DBUG_ENTER("my_eval_gcolumn_expr_helper");
8478   assert(table && table->vfield);
8479   assert(!thd->is_error());
8480 
8481   uchar *old_buf= table->record[0];
8482   repoint_field_to_record(table, old_buf, record);
8483 
8484   blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8485 
8486   /*
8487     If it's purge thread, we need get the space allocated by storage engine
8488     for blob.
8489   */
8490   if (in_purge)
8491     extract_blob_space_and_length_from_record_buff(table, fields,
8492                                                    blob_len_ptr_array);
8493 
8494   bool res= false;
8495   MY_BITMAP fields_to_evaluate;
8496   my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8497   bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields, 0);
8498   bitmap_set_all(&fields_to_evaluate);
8499   bitmap_intersect(&fields_to_evaluate, fields);
8500   /*
8501     In addition to evaluating the value for the columns requested by
8502     the caller we also need to evaluate any virtual columns that these
8503     depend on.
8504     This loop goes through the columns that should be evaluated and
8505     adds all the base columns. If the base column is virtual, it has
8506     to be evaluated.
8507   */
8508   for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8509   {
8510     Field *field= *vfield_ptr;
8511     // Validate that the field number is less than the bit map size
8512     assert(field->field_index < fields->n_bits);
8513 
8514     if (bitmap_is_set(fields, field->field_index))
8515       bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8516   }
8517 
8518    /*
8519      Evaluate all requested columns and all base columns these depends
8520      on that are virtual.
8521 
8522      This function is called by the storage engine, which may request to
8523      evaluate more generated columns than read_set/write_set says.
8524      For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8525      record from the clustered index and asks us to compute generated columns
8526      that match key fields in the used secondary index. So we trust that the
8527      engine has filled all base columns necessary to requested computations,
8528      and we ignore read_set/write_set.
8529   */
8530 
8531   my_bitmap_map *old_maps[2];
8532   dbug_tmp_use_all_columns(table, old_maps,
8533                            table->read_set, table->write_set);
8534 
8535   for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8536   {
8537     Field *field= *vfield_ptr;
8538 
8539     // Check if we should evaluate this field
8540     if (bitmap_is_set(&fields_to_evaluate, field->field_index) &&
8541         field->is_virtual_gcol())
8542     {
8543       assert(field->gcol_info && field->gcol_info->expr_item->fixed);
8544 
8545       const type_conversion_status save_in_field_status=
8546         field->gcol_info->expr_item->save_in_field(field, 0);
8547       assert(!thd->is_error() || save_in_field_status != TYPE_OK);
8548 
8549       /*
8550         save_in_field() may return non-zero even if there was no
8551         error. This happens if a warning is raised, such as an
8552         out-of-range warning when converting the result to the target
8553         type of the virtual column. We should stop only if the
8554         non-zero return value was caused by an actual error.
8555       */
8556       if (save_in_field_status != TYPE_OK && thd->is_error())
8557       {
8558         res= true;
8559         break;
8560       }
8561     }
8562   }
8563 
8564   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8565 
8566   /*
8567     If it's a purge thread, we need copy the blob data into specified place
8568     allocated by storage engine so that the blob data still can be accessed
8569     after table is closed.
8570   */
8571   if (in_purge)
8572     copy_blob_data(table, fields, blob_len_ptr_array);
8573 
8574   repoint_field_to_record(table, record, old_buf);
8575   DBUG_RETURN(res);
8576 }
8577 
8578 
8579 /**
8580    Callback to allow InnoDB to prepare a template for generated
8581    column processing. This function will open the table without
8582    opening in the engine and call the provided function with
8583    the TABLE object made. The function will then close the TABLE.
8584 
8585    @param thd            Thread handle
8586    @param db_name        Name of database containing the table
8587    @param table_name     Name of table to open
8588    @param myc            InnoDB function to call for processing TABLE
8589    @param ib_table       Argument for InnoDB function
8590 
8591    @return true in case of error, false otherwise.
8592 */
8593 
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8594 bool handler::my_prepare_gcolumn_template(THD *thd,
8595                                           const char *db_name,
8596                                           const char *table_name,
8597                                           my_gcolumn_template_callback_t myc,
8598                                           void* ib_table)
8599 {
8600   char path[FN_REFLEN + 1];
8601   bool was_truncated;
8602   build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8603                        db_name, table_name, "", 0, &was_truncated);
8604   assert(!was_truncated);
8605   lex_start(thd);
8606   bool rc= true;
8607 
8608   // Note! The last argument to open_table_uncached() must be false,
8609   // since the table already exists in the TDC. Allowing the table to
8610   // be opened in the SE in this case is dangerous as the two shares
8611   // could get conflicting SE private data.
8612   TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8613                                     false, false);
8614   if (table)
8615   {
8616     myc(table, ib_table);
8617     intern_close_table(table);
8618     rc= false;
8619   }
8620   lex_end(thd->lex);
8621   return rc;
8622 }
8623 
8624 
8625 /**
8626    Callback for generated columns processing. Will open the table, in the
8627    server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8628    processing. This function is a variant of the other
8629    handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8630    object already exists - e.g. from purge threads.
8631 
8632    Note! The call to open_table_uncached() must be made with the last
8633    argument (open_in_engine) set to false. Failing to do so will cause
8634    deadlocks and incorrect behavior.
8635 
8636    @param thd             Thread handle
8637    @param db_name         Database containing the table to open
8638    @param table_name      Name of table to open
8639    @param fields          Bitmap of field index of evaluated generated column
8640    @param record          Record buffer
8641 
8642    @return true in case of error, false otherwise.
8643 */
8644 
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record)8645 bool handler::my_eval_gcolumn_expr_with_open(THD *thd,
8646                                              const char *db_name,
8647                                              const char *table_name,
8648                                              const MY_BITMAP *const fields,
8649                                              uchar *record)
8650 {
8651   bool retval= true;
8652   lex_start(thd);
8653 
8654   char path[FN_REFLEN + 1];
8655   bool was_truncated;
8656   build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8657                        db_name, table_name, "", 0, &was_truncated);
8658   assert(!was_truncated);
8659 
8660   TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8661                                     false, false);
8662   if (table)
8663   {
8664     retval= my_eval_gcolumn_expr_helper(thd, table, fields, record, true);
8665     intern_close_table(table);
8666   }
8667 
8668   lex_end(thd->lex);
8669   return retval;
8670 }
8671 
8672 
8673 /**
8674   Evaluate generated Column's value. If the engine has to write an index entry
8675   to its UNDO log (in a DELETE or UPDATE), and the index is on a virtual
8676   generated column, engine needs to calculate the column's value. This variant
8677   of handler::my_eval_gcolumn_expr() is used by client threads which have a
8678   TABLE.
8679 
8680   @param thd        Thread handle
8681   @param table      mysql table object
8682   @param fields     bitmap of field index of evaluated
8683 	            generated column
8684   @param record     buff of base columns generated column depends.
8685                     After calling this function, it will be used to
8686                     return the value of generated column.
8687 
8688   @retval true in case of error
8689   @retval false on success.
8690 */
8691 
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record)8692 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8693 				   const MY_BITMAP *const fields,
8694                                    uchar *record)
8695 {
8696   DBUG_ENTER("my_eval_gcolumn_expr");
8697 
8698   const bool res=
8699      my_eval_gcolumn_expr_helper(thd, table, fields, record, false);
8700   DBUG_RETURN(res);
8701 }
8702 
8703 
8704 /**
8705   Auxiliary structure for passing information to notify_*_helper()
8706   functions.
8707 */
8708 
8709 struct HTON_NOTIFY_PARAMS
8710 {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8711   HTON_NOTIFY_PARAMS(const MDL_key *mdl_key,
8712                      ha_notification_type mdl_type)
8713     : key(mdl_key), notification_type(mdl_type),
8714       some_htons_were_notified(false),
8715       victimized(false)
8716   {}
8717 
8718   const MDL_key *key;
8719   const ha_notification_type notification_type;
8720   bool some_htons_were_notified;
8721   bool victimized;
8722 };
8723 
8724 
8725 static my_bool
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8726 notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin, void *arg)
8727 {
8728   handlerton *hton= plugin_data<handlerton*>(plugin);
8729   if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl)
8730   {
8731     HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8732 
8733     if (hton->notify_exclusive_mdl(thd, params->key,
8734                                    params->notification_type,
8735                                    &params->victimized))
8736     {
8737       // Ignore failures from post event notification.
8738       if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8739         return TRUE;
8740     }
8741     else
8742       params->some_htons_were_notified= true;
8743   }
8744   return FALSE;
8745 }
8746 
8747 
8748 /**
8749   Notify/get permission from all interested storage engines before
8750   acquisition or after release of exclusive metadata lock on object
8751   represented by key.
8752 
8753   @param thd                Thread context.
8754   @param mdl_key            MDL key identifying object on which exclusive
8755                             lock is to be acquired/was released.
8756   @param notification_type  Indicates whether this is pre-acquire or
8757                             post-release notification.
8758   @param victimized        'true' if locking failed as we were selected
8759                             as a victim in order to avoid possible deadlocks.
8760 
8761   @note @see handlerton::notify_exclusive_mdl for details about
8762         calling convention and error reporting.
8763 
8764   @return False - if notification was successful/lock can be acquired,
8765           True - if it has failed/lock should not be acquired.
8766 */
8767 
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8768 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8769                              ha_notification_type notification_type,
8770                              bool *victimized)
8771 {
8772   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8773   *victimized = false;
8774   if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8775                      MYSQL_STORAGE_ENGINE_PLUGIN, &params))
8776   {
8777     *victimized = params.victimized;
8778     /*
8779       If some SE hasn't given its permission to acquire lock and some SEs
8780       has given their permissions, we need to notify the latter group about
8781       failed lock acquisition. We do this by calling post-release notification
8782       for all interested SEs unconditionally.
8783     */
8784     if (notification_type == HA_NOTIFY_PRE_EVENT &&
8785         params.some_htons_were_notified)
8786     {
8787       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8788       (void) plugin_foreach(thd, notify_exclusive_mdl_helper,
8789                             MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8790     }
8791     return true;
8792   }
8793   return false;
8794 }
8795 
8796 
8797 static my_bool
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8798 notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg)
8799 {
8800   handlerton *hton= plugin_data<handlerton*>(plugin);
8801   if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table)
8802   {
8803     HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8804 
8805     if (hton->notify_alter_table(thd, params->key, params->notification_type))
8806     {
8807       // Ignore failures from post event notification.
8808       if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8809         return TRUE;
8810     }
8811     else
8812       params->some_htons_were_notified= true;
8813   }
8814   return FALSE;
8815 }
8816 
8817 
8818 /**
8819   Notify/get permission from all interested storage engines before
8820   or after executed ALTER TABLE on the table identified by key.
8821 
8822   @param thd                Thread context.
8823   @param mdl_key            MDL key identifying table.
8824   @param notification_type  Indicates whether this is pre-ALTER or
8825                             post-ALTER notification.
8826 
8827   @note @see handlerton::notify_alter_table for rationale,
8828         details about calling convention and error reporting.
8829 
8830   @return False - if notification was successful/ALTER TABLE can
8831                   proceed.
8832           True -  if it has failed/ALTER TABLE should fail.
8833 */
8834 
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)8835 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
8836                            ha_notification_type notification_type)
8837 {
8838   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8839 
8840   if (plugin_foreach(thd, notify_alter_table_helper,
8841                      MYSQL_STORAGE_ENGINE_PLUGIN, &params))
8842   {
8843     /*
8844       If some SE hasn't given its permission to do ALTER TABLE and some SEs
8845       has given their permissions, we need to notify the latter group about
8846       failed attemopt. We do this by calling post-ALTER TABLE notification
8847       for all interested SEs unconditionally.
8848     */
8849     if (notification_type == HA_NOTIFY_PRE_EVENT &&
8850         params.some_htons_were_notified)
8851     {
8852       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8853       (void) plugin_foreach(thd, notify_alter_table_helper,
8854                             MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8855     }
8856     return true;
8857   }
8858   return false;
8859 }
8860 
8861 /**
8862   Set the transaction isolation level for the next transaction and update
8863   session tracker information about the transaction isolation level.
8864 
8865   @param thd           THD session setting the tx_isolation.
8866   @param tx_isolation  The isolation level to be set.
8867   @param one_shot      True if the isolation level should be restored to
8868                        session default after finishing the transaction.
8869 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)8870 bool set_tx_isolation(THD *thd,
8871                       enum_tx_isolation tx_isolation,
8872                       bool one_shot)
8873 {
8874   Transaction_state_tracker *tst= NULL;
8875 
8876   if (thd->variables.session_track_transaction_info > TX_TRACK_NONE)
8877     tst= (Transaction_state_tracker *)
8878            thd->session_tracker.get_tracker(TRANSACTION_INFO_TRACKER);
8879 
8880   thd->tx_isolation= tx_isolation;
8881 
8882   if (one_shot)
8883   {
8884     assert(!thd->in_active_multi_stmt_transaction());
8885     assert(!thd->in_sub_stmt);
8886     enum enum_tx_isol_level l;
8887     switch (thd->tx_isolation) {
8888     case ISO_READ_UNCOMMITTED:
8889       l=  TX_ISOL_UNCOMMITTED;
8890       break;
8891     case ISO_READ_COMMITTED:
8892       l=  TX_ISOL_COMMITTED;
8893       break;
8894     case ISO_REPEATABLE_READ:
8895       l= TX_ISOL_REPEATABLE;
8896       break;
8897     case ISO_SERIALIZABLE:
8898       l= TX_ISOL_SERIALIZABLE;
8899       break;
8900     default:
8901       assert(0);
8902       return true;
8903     }
8904     if (tst)
8905       tst->set_isol_level(thd, l);
8906   }
8907   else if (tst)
8908   {
8909     tst->set_isol_level(thd, TX_ISOL_INHERIT);
8910   }
8911   return false;
8912 }
8913 
8914 
8915 /**
8916   Checks if the file name is reserved word used by SE by invoking
8917   the handlerton method.
8918 
8919   @param  unused1       thread handler which is unused.
8920   @param  plugin        SE plugin.
8921   @param  name          Database name.
8922 
8923   @retval true          If the name is reserved word.
8924   @retval false         If the name is not reserved word.
8925 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)8926 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
8927                                               void *name)
8928 {
8929   handlerton *hton= plugin_data<handlerton*>(plugin);
8930   if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8931     return (hton->is_reserved_db_name(hton, (const char *)name));
8932   return false;
8933 }
8934 
8935 
8936 /**
8937    Check if the file name is reserved word used by SE.
8938 
8939    @param  name    Database name.
8940 
8941    @retval true    If the name is a reserved word.
8942    @retval false   If the name is not a reserved word.
8943 */
ha_check_reserved_db_name(const char * name)8944 bool ha_check_reserved_db_name(const char* name)
8945 {
8946   return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
8947                          MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
8948 }
8949