1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /** @file sql/handler.cc
24 
25     @brief
26     Implements functions in the handler interface that are shared between all
27     storage engines.
28 */
29 
30 #include "sql/handler.h"
31 
32 #include <ctype.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <algorithm>
38 #include <atomic>
39 #include <cmath>
40 #include <list>
41 #include <random>  // std::uniform_real_distribution
42 #include <string>
43 #include <vector>
44 
45 #include "keycache.h"
46 #include "libbinlogevents/include/binlog_event.h"
47 #include "m_ctype.h"
48 #include "m_string.h"
49 #include "my_bit.h"     // my_count_bits
50 #include "my_bitmap.h"  // MY_BITMAP
51 #include "my_check_opt.h"
52 #include "my_dbug.h"
53 #include "my_loglevel.h"
54 #include "my_macros.h"
55 #include "my_pointer_arithmetic.h"
56 #include "my_psi_config.h"
57 #include "my_sqlcommand.h"
58 #include "my_sys.h"  // MEM_DEFINED_IF_ADDRESSABLE()
59 #include "myisam.h"  // TT_FOR_UPGRADE
60 #include "mysql/components/services/log_builtins.h"
61 #include "mysql/components/services/log_shared.h"
62 #include "mysql/plugin.h"
63 #include "mysql/psi/mysql_file.h"
64 #include "mysql/psi/mysql_mutex.h"
65 #include "mysql/psi/mysql_table.h"
66 #include "mysql/psi/mysql_transaction.h"
67 #include "mysql/psi/psi_base.h"
68 #include "mysql/psi/psi_table.h"
69 #include "mysql/service_mysql_alloc.h"
70 #include "mysql_com.h"
71 #include "mysql_version.h"  // MYSQL_VERSION_ID
72 #include "mysqld_error.h"
73 #include "prealloced_array.h"
74 #include "sql/auth/auth_common.h"  // check_readonly() and SUPER_ACL
75 #include "sql/binlog.h"            // mysql_bin_log
76 #include "sql/check_stack.h"
77 #include "sql/clone_handler.h"
78 #include "sql/current_thd.h"
79 #include "sql/dd/cache/dictionary_client.h"  // dd::cache::Dictionary_client
80 #include "sql/dd/dd.h"                       // dd::get_dictionary
81 #include "sql/dd/dictionary.h"               // dd:acquire_shared_table_mdl
82 #include "sql/dd/types/table.h"              // dd::Table
83 #include "sql/dd_table_share.h"              // open_table_def
84 #include "sql/debug_sync.h"                  // DEBUG_SYNC
85 #include "sql/derror.h"                      // ER_DEFAULT
86 #include "sql/error_handler.h"               // Internal_error_handler
87 #include "sql/field.h"
88 #include "sql/item.h"
89 #include "sql/lock.h"  // MYSQL_LOCK
90 #include "sql/log.h"
91 #include "sql/log_event.h"  // Write_rows_log_event
92 #include "sql/mdl.h"
93 #include "sql/mysqld.h"                 // global_system_variables heap_hton ..
94 #include "sql/opt_costconstantcache.h"  // reload_optimizer_cost_constants
95 #include "sql/opt_costmodel.h"
96 #include "sql/opt_hints.h"
97 #include "sql/protocol.h"
98 #include "sql/psi_memory_key.h"
99 #include "sql/query_options.h"
100 #include "sql/record_buffer.h"  // Record_buffer
101 #include "sql/rpl_filter.h"
102 #include "sql/rpl_gtid.h"
103 #include "sql/rpl_handler.h"  // RUN_HOOK
104 #include "sql/rpl_rli.h"      // is_atomic_ddl_commit_on_slave
105 #include "sql/rpl_slave_commit_order_manager.h"  // Commit_order_manager
106 #include "sql/rpl_write_set_handler.h"           // add_pke
107 #include "sql/sdi_utils.h"                       // import_serialized_meta_data
108 #include "sql/session_tracker.h"
109 #include "sql/sql_base.h"  // free_io_cache
110 #include "sql/sql_bitmap.h"
111 #include "sql/sql_class.h"
112 #include "sql/sql_error.h"
113 #include "sql/sql_lex.h"
114 #include "sql/sql_parse.h"   // check_stack_overrun
115 #include "sql/sql_plugin.h"  // plugin_foreach
116 #include "sql/sql_select.h"  // actual_key_parts
117 #include "sql/sql_table.h"   // build_table_filename
118 #include "sql/strfunc.h"     // strnncmp_nopads
119 #include "sql/system_variables.h"
120 #include "sql/table.h"
121 #include "sql/tc_log.h"
122 #include "sql/thr_malloc.h"
123 #include "sql/transaction.h"  // trans_commit_implicit
124 #include "sql/transaction_info.h"
125 #include "sql/xa.h"
126 #include "sql_string.h"
127 #include "sql_tmp_table.h"  // free_tmp_table
128 #include "template_utils.h"
129 #include "uniques.h"  // Unique_on_insert
130 #include "varlen_sort.h"
131 
132 /**
133   @def MYSQL_TABLE_IO_WAIT
134   Instrumentation helper for table io_waits.
135   Note that this helper is intended to be used from
136   within the handler class only, as it uses members
137   from @c handler
138   Performance schema events are instrumented as follows:
139   - in non batch mode, one event is generated per call
140   - in batch mode, the number of rows affected is saved
141   in @c m_psi_numrows, so that @c end_psi_batch_mode()
142   generates a single event for the batch.
143   @param OP the table operation to be performed
144   @param INDEX the table index used if any, or MAX_KEY.
145   @param RESULT the result of the table operation performed
146   @param PAYLOAD instrumented code to execute
147   @sa handler::end_psi_batch_mode.
148 */
149 #ifdef HAVE_PSI_TABLE_INTERFACE
150 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD)                     \
151   {                                                                         \
152     if (m_psi != NULL) {                                                    \
153       switch (m_psi_batch_mode) {                                           \
154         case PSI_BATCH_MODE_NONE: {                                         \
155           PSI_table_locker *sub_locker = NULL;                              \
156           PSI_table_locker_state reentrant_safe_state;                      \
157           sub_locker = PSI_TABLE_CALL(start_table_io_wait)(                 \
158               &reentrant_safe_state, m_psi, OP, INDEX, __FILE__, __LINE__); \
159           PAYLOAD                                                           \
160           if (sub_locker != NULL) PSI_TABLE_CALL(end_table_io_wait)         \
161           (sub_locker, 1);                                                  \
162           break;                                                            \
163         }                                                                   \
164         case PSI_BATCH_MODE_STARTING: {                                     \
165           m_psi_locker = PSI_TABLE_CALL(start_table_io_wait)(               \
166               &m_psi_locker_state, m_psi, OP, INDEX, __FILE__, __LINE__);   \
167           PAYLOAD                                                           \
168           if (!RESULT) m_psi_numrows++;                                     \
169           m_psi_batch_mode = PSI_BATCH_MODE_STARTED;                        \
170           break;                                                            \
171         }                                                                   \
172         case PSI_BATCH_MODE_STARTED:                                        \
173         default: {                                                          \
174           DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);          \
175           PAYLOAD                                                           \
176           if (!RESULT) m_psi_numrows++;                                     \
177           break;                                                            \
178         }                                                                   \
179       }                                                                     \
180     } else {                                                                \
181       PAYLOAD                                                               \
182     }                                                                       \
183   }
184 #else
185 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) PAYLOAD
186 #endif
187 
188 /**
189   @def MYSQL_TABLE_LOCK_WAIT
190   Instrumentation helper for table io_waits.
191   @param OP the table operation to be performed
192   @param FLAGS per table operation flags.
193   @param PAYLOAD the code to instrument.
194   @sa MYSQL_END_TABLE_WAIT.
195 */
196 #ifdef HAVE_PSI_TABLE_INTERFACE
197 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD)                              \
198   {                                                                            \
199     if (m_psi != NULL) {                                                       \
200       PSI_table_locker *locker;                                                \
201       PSI_table_locker_state state;                                            \
202       locker = PSI_TABLE_CALL(start_table_lock_wait)(&state, m_psi, OP, FLAGS, \
203                                                      __FILE__, __LINE__);      \
204       PAYLOAD                                                                  \
205       if (locker != NULL) PSI_TABLE_CALL(end_table_lock_wait)(locker);         \
206     } else {                                                                   \
207       PAYLOAD                                                                  \
208     }                                                                          \
209   }
210 #else
211 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) PAYLOAD
212 #endif
213 
214 using std::list;
215 using std::log2;
216 using std::max;
217 using std::min;
218 
219 /**
220   While we have legacy_db_type, we have this array to
221   check for dups and to find handlerton from legacy_db_type.
222   Remove when legacy_db_type is finally gone
223 */
224 static Prealloced_array<st_plugin_int *, PREALLOC_NUM_HA> se_plugin_array(
225     PSI_NOT_INSTRUMENTED);
226 
227 /**
228   Array allowing to check if handlerton is builtin without
229   acquiring LOCK_plugin.
230 */
231 static Prealloced_array<bool, PREALLOC_NUM_HA> builtin_htons(
232     PSI_NOT_INSTRUMENTED);
233 
hton2plugin(uint slot)234 st_plugin_int *hton2plugin(uint slot) { return se_plugin_array[slot]; }
235 
num_hton2plugins()236 size_t num_hton2plugins() { return se_plugin_array.size(); }
237 
insert_hton2plugin(uint slot,st_plugin_int * plugin)238 st_plugin_int *insert_hton2plugin(uint slot, st_plugin_int *plugin) {
239   if (se_plugin_array.assign_at(slot, plugin)) return nullptr;
240   builtin_htons.assign_at(slot, true);
241   return se_plugin_array[slot];
242 }
243 
remove_hton2plugin(uint slot)244 st_plugin_int *remove_hton2plugin(uint slot) {
245   st_plugin_int *retval = se_plugin_array[slot];
246   se_plugin_array[slot] = NULL;
247   builtin_htons.assign_at(slot, false);
248   return retval;
249 }
250 
ha_resolve_storage_engine_name(const handlerton * db_type)251 const char *ha_resolve_storage_engine_name(const handlerton *db_type) {
252   return db_type == nullptr ? "UNKNOWN" : hton2plugin(db_type->slot)->name.str;
253 }
254 
255 static handlerton *installed_htons[128];
256 
257 /* number of storage engines (from installed_htons[]) that support 2pc */
258 ulong total_ha_2pc = 0;
259 /* size of savepoint storage area (see ha_init) */
260 ulong savepoint_alloc_size = 0;
261 
262 namespace {
263 struct Storage_engine_identifier {
264   const LEX_CSTRING canonical;
265   const LEX_CSTRING legacy;
266 };
267 const Storage_engine_identifier se_names[] = {
268     {{STRING_WITH_LEN("INNODB")}, {STRING_WITH_LEN("INNOBASE")}},
269     {{STRING_WITH_LEN("NDBCLUSTER")}, {STRING_WITH_LEN("NDB")}},
270     {{STRING_WITH_LEN("MEMORY")}, {STRING_WITH_LEN("HEAP")}},
271     {{STRING_WITH_LEN("MRG_MYISAM")}, {STRING_WITH_LEN("MERGE")}}};
272 const auto se_names_end = std::end(se_names);
273 std::vector<std::string> disabled_se_names;
274 }  // namespace
275 
276 const char *ha_row_type[] = {"",
277                              "FIXED",
278                              "DYNAMIC",
279                              "COMPRESSED",
280                              "REDUNDANT",
281                              "COMPACT",
282                              /* Reserved to be "PAGE" in future versions */ "?",
283                              "?",
284                              "?",
285                              "?"};
286 
287 const char *tx_isolation_names[] = {"READ-UNCOMMITTED", "READ-COMMITTED",
288                                     "REPEATABLE-READ", "SERIALIZABLE", NullS};
289 TYPELIB tx_isolation_typelib = {array_elements(tx_isolation_names) - 1, "",
290                                 tx_isolation_names, nullptr};
291 
292 // Called for each SE to check if given db.table_name is a system table.
293 static bool check_engine_system_table_handlerton(THD *unused, plugin_ref plugin,
294                                                  void *arg);
295 
296 static int ha_discover(THD *thd, const char *db, const char *name,
297                        uchar **frmblob, size_t *frmlen);
298 
299 /**
300   Structure used by SE during check for system table.
301   This structure is passed to each SE handlerton and the status (OUT param)
302   is collected.
303 */
304 struct st_sys_tbl_chk_params {
305   const char *db;                  // IN param
306   const char *table_name;          // IN param
307   bool is_sql_layer_system_table;  // IN param
308   legacy_db_type db_type;          // IN param
309 
310   enum enum_sys_tbl_chk_status {
311     // db.table_name is not a supported system table.
312     NOT_KNOWN_SYSTEM_TABLE,
313     /*
314       db.table_name is a system table,
315       but may not be supported by SE.
316     */
317     KNOWN_SYSTEM_TABLE,
318     /*
319       db.table_name is a system table,
320       and is supported by SE.
321     */
322     SUPPORTED_SYSTEM_TABLE
323   } status;  // OUT param
324 };
325 
ha_default_plugin(THD * thd)326 static plugin_ref ha_default_plugin(THD *thd) {
327   if (thd->variables.table_plugin) return thd->variables.table_plugin;
328   return my_plugin_lock(thd, &global_system_variables.table_plugin);
329 }
330 
331 /** @brief
332   Return the default storage engine handlerton used for non-temp tables
333   for thread
334 
335   SYNOPSIS
336     ha_default_handlerton(thd)
337     thd         current thread
338 
339   RETURN
340     pointer to handlerton
341 */
ha_default_handlerton(THD * thd)342 handlerton *ha_default_handlerton(THD *thd) {
343   plugin_ref plugin = ha_default_plugin(thd);
344   DBUG_ASSERT(plugin);
345   handlerton *hton = plugin_data<handlerton *>(plugin);
346   DBUG_ASSERT(hton);
347   return hton;
348 }
349 
ha_default_temp_plugin(THD * thd)350 static plugin_ref ha_default_temp_plugin(THD *thd) {
351   if (thd->variables.temp_table_plugin) return thd->variables.temp_table_plugin;
352   return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
353 }
354 
355 /** @brief
356   Return the default storage engine handlerton used for explicitly
357   created temp tables for a thread
358 
359   SYNOPSIS
360     ha_default_temp_handlerton(thd)
361     thd         current thread
362 
363   RETURN
364     pointer to handlerton
365 */
ha_default_temp_handlerton(THD * thd)366 handlerton *ha_default_temp_handlerton(THD *thd) {
367   plugin_ref plugin = ha_default_temp_plugin(thd);
368   DBUG_ASSERT(plugin);
369   handlerton *hton = plugin_data<handlerton *>(plugin);
370   DBUG_ASSERT(hton);
371   return hton;
372 }
373 
374 /**
375   Resolve handlerton plugin by name, without checking for "DEFAULT" or
376   HTON_NOT_USER_SELECTABLE.
377 
378   @param thd  Thread context.
379   @param name Plugin name.
380 
381   @return plugin or NULL if not found.
382 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)383 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name) {
384   return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
385 }
386 
hton_charset()387 static const CHARSET_INFO &hton_charset() { return *system_charset_info; }
388 
389 /**
390   Return the storage engine handlerton for the supplied name.
391 
392   @param thd           Current thread. May be nullptr, (e.g. during initialize).
393   @param name          Name of storage engine.
394   @param is_temp_table true if table is a temporary table.
395 
396   @return Pointer to storage engine plugin handle.
397 */
ha_resolve_by_name(THD * thd,const LEX_CSTRING * name,bool is_temp_table)398 plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
399                               bool is_temp_table) {
400   if (thd && 0 == strnncmp_nopads(hton_charset(), *name,
401                                   {STRING_WITH_LEN("DEFAULT")})) {
402     return is_temp_table ? ha_default_plugin(thd) : ha_default_temp_plugin(thd);
403   }
404 
405   // Note that thd CAN be nullptr here - it is not actually needed by
406   // ha_resolve_by_name_raw().
407   plugin_ref plugin = ha_resolve_by_name_raw(thd, *name);
408   if (plugin == nullptr) {
409     // If we fail to resolve the name passed in, we try to see if it is a
410     // historical alias.
411     auto match = std::find_if(
412         std::begin(se_names), se_names_end,
413         [&](const Storage_engine_identifier &sei) {
414           return (0 == strnncmp_nopads(hton_charset(), *name, sei.legacy));
415         });
416     if (match != se_names_end) {
417       // if it is, we resolve using the new name
418       plugin = ha_resolve_by_name_raw(thd, match->canonical);
419     }
420   }
421   if (plugin != nullptr) {
422     handlerton *hton = plugin_data<handlerton *>(plugin);
423     if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE)) return plugin;
424 
425     /*
426       unlocking plugin immediately after locking is relatively low cost.
427     */
428     plugin_unlock(thd, plugin);
429   }
430   return nullptr;
431 }
432 
433 /**
434   Read a comma-separated list of storage engine names. Look up each in the
435   known list of canonical and legacy names. In case of a match; add both the
436   canonical and the legacy name to disabled_se_names, which is a static vector
437   of disabled storage engine names.
438   If there is no match, the unmodified name is added to the vector.
439 */
set_externally_disabled_storage_engine_names(const char * disabled_list)440 void set_externally_disabled_storage_engine_names(const char *disabled_list) {
441   DBUG_ASSERT(disabled_list != nullptr);
442 
443   myu::Split(
444       disabled_list, disabled_list + strlen(disabled_list), myu::IsComma,
445       [](const char *f, const char *l) {
446         auto tr = myu::FindTrimmedRange(f, l, myu::IsSpace);
447         if (tr.first == tr.second) return;
448 
449         const LEX_CSTRING dse{tr.first,
450                               static_cast<size_t>(tr.second - tr.first)};
451         auto match = std::find_if(
452             std::begin(se_names), se_names_end,
453             [&](const Storage_engine_identifier &seid) {
454               return (
455                   (0 == strnncmp_nopads(hton_charset(), dse, seid.canonical)) ||
456                   (0 == strnncmp_nopads(hton_charset(), dse, seid.legacy)));
457             });
458         if (match == se_names_end) {
459           disabled_se_names.emplace_back(dse.str, dse.length);
460           return;
461         }
462         disabled_se_names.emplace_back(match->canonical.str,
463                                        match->canonical.length);
464         disabled_se_names.emplace_back(match->legacy.str, match->legacy.length);
465       });
466 }
467 
is_storage_engine_name_externally_disabled(const char * name)468 static bool is_storage_engine_name_externally_disabled(const char *name) {
469   const LEX_CSTRING n{name, strlen(name)};
470   return std::any_of(
471       disabled_se_names.begin(), disabled_se_names.end(),
472       [&](const std::string &dse) {
473         return (0 == strnncmp_nopads(hton_charset(), n,
474                                      {dse.c_str(), dse.length()}));
475       });
476 }
477 
478 /**
479   Returns true if the storage engine of the handlerton argument has
480   been listed in the disabled_storage_engines system variable. @note
481   that the SE may still be internally enabled, that is
482   HaIsInternallyEnabled may return true.
483  */
ha_is_externally_disabled(const handlerton & htnr)484 bool ha_is_externally_disabled(const handlerton &htnr) {
485   const char *se_name = ha_resolve_storage_engine_name(&htnr);
486   DBUG_ASSERT(se_name != nullptr);
487   return is_storage_engine_name_externally_disabled(se_name);
488 }
489 
490 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)491 bool ha_is_storage_engine_disabled(handlerton *se_handle) {
492   DBUG_ASSERT(se_handle != nullptr);
493   return ha_is_externally_disabled(*se_handle);
494 }
495 
ha_lock_engine(THD * thd,const handlerton * hton)496 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton) {
497   if (hton) {
498     st_plugin_int **plugin = &se_plugin_array[hton->slot];
499 
500 #ifdef DBUG_OFF
501     /*
502       Take a shortcut for builtin engines -- return pointer to plugin
503       without acquiring LOCK_plugin mutex. This is safe safe since such
504       plugins are not deleted until shutdown and we don't do reference
505       counting in non-debug builds for them.
506 
507       Since we have reference to handlerton on our hands, this method
508       can't be called concurrently to non-builtin handlerton initialization/
509       deinitialization. So it is safe to access builtin_htons[] without
510       additional locking.
511      */
512     if (builtin_htons[hton->slot]) return *plugin;
513 
514     return my_plugin_lock(thd, plugin);
515 #else
516     /*
517       We can't take shortcut in debug builds.
518       At least assert that builtin_htons[slot] is set correctly.
519     */
520     DBUG_ASSERT(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == nullptr));
521     return my_plugin_lock(thd, &plugin);
522 #endif
523   }
524   return nullptr;
525 }
526 
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)527 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) {
528   plugin_ref plugin;
529   switch (db_type) {
530     case DB_TYPE_DEFAULT:
531       return ha_default_handlerton(thd);
532     default:
533       if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
534           (plugin = ha_lock_engine(thd, installed_htons[db_type])))
535         return plugin_data<handlerton *>(plugin);
536       /* fall through */
537     case DB_TYPE_UNKNOWN:
538       return nullptr;
539   }
540 }
541 
542 /**
543   Use other database handler if databasehandler is not compiled in.
544 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)545 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
546                          bool no_substitute, bool report_error) {
547   DBUG_TRACE;
548   handlerton *hton = ha_resolve_by_legacy_type(thd, database_type);
549   if (ha_storage_engine_is_enabled(hton)) return hton;
550 
551   if (no_substitute) {
552     if (report_error) {
553       const char *engine_name = ha_resolve_storage_engine_name(hton);
554       my_error(ER_FEATURE_DISABLED, MYF(0), engine_name, engine_name);
555     }
556     return nullptr;
557   }
558 
559   (void)RUN_HOOK(transaction, after_rollback, (thd, false));
560 
561   switch (database_type) {
562     case DB_TYPE_MRG_ISAM:
563       return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
564     default:
565       break;
566   }
567 
568   return ha_default_handlerton(thd);
569 } /* ha_checktype */
570 
571 /**
572   Create handler object for the table in the storage engine.
573 
574   @param share        TABLE_SHARE for the table, can be NULL if caller
575                       didn't perform full-blown open of table definition.
576   @param partitioned  Indicates whether table is partitioned.
577   @param alloc        Memory root to be used for allocating handler object.
578   @param db_type      Table's storage engine.
579 
580   @note This function will try to use default storage engine if one which
581         was specified through db_type parameter is not available.
582 */
get_new_handler(TABLE_SHARE * share,bool partitioned,MEM_ROOT * alloc,handlerton * db_type)583 handler *get_new_handler(TABLE_SHARE *share, bool partitioned, MEM_ROOT *alloc,
584                          handlerton *db_type) {
585   handler *file;
586   DBUG_TRACE;
587   DBUG_PRINT("enter", ("alloc: %p", alloc));
588 
589   if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create) {
590     if ((file = db_type->create(db_type, share, partitioned, alloc)))
591       file->init();
592     return file;
593   }
594   /*
595     Try the default table type
596     Here the call to current_thd() is ok as we call this function a lot of
597     times but we enter this branch very seldom.
598   */
599   return get_new_handler(share, partitioned, alloc,
600                          ha_default_handlerton(current_thd));
601 }
602 
603 static const char **handler_errmsgs;
604 
get_handler_errmsg(int nr)605 static const char *get_handler_errmsg(int nr) {
606   return handler_errmsgs[nr - HA_ERR_FIRST];
607 }
608 
609 /**
610   Register handler error messages for use with my_error().
611 
612   @retval
613     0           OK
614   @retval
615     !=0         Error
616 */
617 
ha_init_errors(void)618 int ha_init_errors(void) {
619 #define SETMSG(nr, msg) handler_errmsgs[(nr)-HA_ERR_FIRST] = (msg)
620 
621   /* Allocate a pointer array for the error message strings. */
622   /* Zerofill it to avoid uninitialized gaps. */
623   if (!(handler_errmsgs = (const char **)my_malloc(
624             key_memory_handler_errmsgs, HA_ERR_ERRORS * sizeof(char *),
625             MYF(MY_WME | MY_ZEROFILL))))
626     return 1;
627 
628   /* Set the dedicated error messages. */
629   SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
630   SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
631   SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
632   SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
633   SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
634   SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
635   SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
636   SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
637   SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
638   SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
639   SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
640   SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
641   SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
642   SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
643   SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
644   SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
645   SETMSG(HA_ERR_TOO_BIG_ROW, "Too big row");
646   SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
647   SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
648   SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
649   SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
650   SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
651   SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
652   SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
653   SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
654   SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
655   SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
656   SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
657   SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
658   SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
659   SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
660   SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
661   SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
662   SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
663   SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
664   SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
665   SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,
666          "FK constraint would lead to duplicate key");
667   SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
668   SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
669   SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
670   SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
671   SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS,
672          ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
673   SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
674   SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
675   SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
676   SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
677   SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
678   SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
679   SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,
680          "FTS query exceeds result cache limit");
681   SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,
682          ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
683   SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
684   SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,
685          "Too many words in a FTS phrase or proximity search");
686   SETMSG(HA_ERR_TABLE_CORRUPT, ER_DEFAULT(ER_TABLE_CORRUPT));
687   SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
688   SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY,
689          ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
690   SETMSG(HA_ERR_WRONG_FILE_NAME, ER_DEFAULT(ER_WRONG_FILE_NAME));
691   SETMSG(HA_ERR_NOT_ALLOWED_COMMAND, ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
692   SETMSG(HA_ERR_COMPUTE_FAILED, "Compute virtual column value failed");
693   SETMSG(HA_ERR_DISK_FULL_NOWAIT, ER_DEFAULT(ER_DISK_FULL_NOWAIT));
694   SETMSG(HA_ERR_NO_SESSION_TEMP, ER_DEFAULT(ER_NO_SESSION_TEMP));
695   SETMSG(HA_ERR_WRONG_TABLE_NAME, ER_DEFAULT(ER_WRONG_TABLE_NAME));
696   SETMSG(HA_ERR_TOO_LONG_PATH, ER_DEFAULT(ER_TABLE_NAME_CAUSES_TOO_LONG_PATH));
697   /* Register the error messages for use with my_error(). */
698   return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
699 }
700 
ha_finalize_handlerton(st_plugin_int * plugin)701 int ha_finalize_handlerton(st_plugin_int *plugin) {
702   handlerton *hton = (handlerton *)plugin->data;
703   DBUG_TRACE;
704 
705   /* hton can be NULL here, if ha_initialize_handlerton() failed. */
706   if (!hton) goto end;
707 
708   switch (hton->state) {
709     case SHOW_OPTION_NO:
710     case SHOW_OPTION_DISABLED:
711       break;
712     case SHOW_OPTION_YES:
713       if (installed_htons[hton->db_type] == hton)
714         installed_htons[hton->db_type] = nullptr;
715       break;
716   };
717 
718   if (hton->panic) hton->panic(hton, HA_PANIC_CLOSE);
719 
720   if (plugin->plugin->deinit) {
721     /*
722       Today we have no defined/special behavior for uninstalling
723       engine plugins.
724     */
725     DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
726     if (plugin->plugin->deinit(nullptr)) {
727       DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
728                              plugin->name.str));
729     }
730   }
731 
732   /*
733     In case a plugin is uninstalled and re-installed later, it should
734     reuse an array slot. Otherwise the number of uninstall/install
735     cycles would be limited.
736   */
737   if (hton->slot != HA_SLOT_UNDEF) {
738     /* Make sure we are not unpluging another plugin */
739     DBUG_ASSERT(se_plugin_array[hton->slot] == plugin);
740     DBUG_ASSERT(hton->slot < se_plugin_array.size());
741     se_plugin_array[hton->slot] = NULL;
742     builtin_htons[hton->slot] = false; /* Extra correctness. */
743   }
744 
745   my_free(hton);
746   plugin->data = nullptr;
747 end:
748   return 0;
749 }
750 
ha_initialize_handlerton(st_plugin_int * plugin)751 int ha_initialize_handlerton(st_plugin_int *plugin) {
752   handlerton *hton;
753   DBUG_TRACE;
754   DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
755 
756   hton = (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton),
757                                  MYF(MY_WME | MY_ZEROFILL));
758 
759   if (hton == nullptr) {
760     LogErr(ERROR_LEVEL, ER_HANDLERTON_OOM, plugin->name.str);
761     goto err_no_hton_memory;
762   }
763 
764   hton->slot = HA_SLOT_UNDEF;
765   /* Historical Requirement */
766   plugin->data = hton;  // shortcut for the future
767   if (plugin->plugin->init && plugin->plugin->init(hton)) {
768     LogErr(ERROR_LEVEL, ER_PLUGIN_INIT_FAILED, plugin->name.str);
769     goto err;
770   }
771 
772   /*
773     the switch below and hton->state should be removed when
774     command-line options for plugins will be implemented
775   */
776   DBUG_PRINT("info", ("hton->state=%d", hton->state));
777   switch (hton->state) {
778     case SHOW_OPTION_NO:
779       break;
780     case SHOW_OPTION_YES: {
781       uint tmp;
782       ulong fslot;
783       /* now check the db_type for conflict */
784       if (hton->db_type <= DB_TYPE_UNKNOWN ||
785           hton->db_type >= DB_TYPE_DEFAULT || installed_htons[hton->db_type]) {
786         int idx = (int)DB_TYPE_FIRST_DYNAMIC;
787 
788         while (idx < (int)DB_TYPE_DEFAULT && installed_htons[idx]) idx++;
789 
790         if (idx == (int)DB_TYPE_DEFAULT) {
791           LogErr(WARNING_LEVEL, ER_TOO_MANY_STORAGE_ENGINES);
792           goto err_deinit;
793         }
794         if (hton->db_type != DB_TYPE_UNKNOWN)
795           LogErr(WARNING_LEVEL, ER_SE_TYPECODE_CONFLICT, plugin->plugin->name,
796                  idx);
797         hton->db_type = (enum legacy_db_type)idx;
798       }
799 
800       /*
801         In case a plugin is uninstalled and re-installed later, it should
802         reuse an array slot. Otherwise the number of uninstall/install
803         cycles would be limited. So look for a free slot.
804       */
805       DBUG_PRINT("plugin",
806                  ("total_ha: %lu", static_cast<ulong>(se_plugin_array.size())));
807       for (fslot = 0; fslot < se_plugin_array.size(); fslot++) {
808         if (!se_plugin_array[fslot]) break;
809       }
810       if (fslot < se_plugin_array.size())
811         hton->slot = fslot;
812       else {
813         hton->slot = se_plugin_array.size();
814       }
815       if (se_plugin_array.assign_at(hton->slot, plugin) ||
816           builtin_htons.assign_at(hton->slot, (plugin->plugin_dl == nullptr)))
817         goto err_deinit;
818 
819       installed_htons[hton->db_type] = hton;
820       tmp = hton->savepoint_offset;
821       hton->savepoint_offset = savepoint_alloc_size;
822       savepoint_alloc_size += tmp;
823       if (hton->prepare) total_ha_2pc++;
824       break;
825     }
826       /* fall through */
827     default:
828       hton->state = SHOW_OPTION_DISABLED;
829       break;
830   }
831 
832   /*
833     This is entirely for legacy. We will create a new "disk based" hton and a
834     "memory" hton which will be configurable longterm. We should be able to
835     remove partition and myisammrg.
836   */
837   switch (hton->db_type) {
838     case DB_TYPE_HEAP:
839       heap_hton = hton;
840       break;
841     case DB_TYPE_TEMPTABLE:
842       temptable_hton = hton;
843       break;
844     case DB_TYPE_MYISAM:
845       myisam_hton = hton;
846       break;
847     case DB_TYPE_INNODB:
848       innodb_hton = hton;
849       break;
850     default:
851       break;
852   };
853 
854   /*
855     Re-load the optimizer cost constants since this storage engine can
856     have non-default cost constants.
857   */
858   reload_optimizer_cost_constants();
859 
860   return 0;
861 
862 err_deinit:
863   /*
864     Let plugin do its inner deinitialization as plugin->init()
865     was successfully called before.
866   */
867   if (plugin->plugin->deinit) (void)plugin->plugin->deinit(nullptr);
868 
869 err:
870   my_free(hton);
871 err_no_hton_memory:
872   plugin->data = nullptr;
873   return 1;
874 }
875 
ha_init()876 int ha_init() {
877   int error = 0;
878   DBUG_TRACE;
879 
880   /*
881     Check if there is a transaction-capable storage engine besides the
882     binary log.
883   */
884   opt_using_transactions =
885       se_plugin_array.size() > static_cast<ulong>(opt_bin_log);
886   savepoint_alloc_size += sizeof(SAVEPOINT);
887 
888   return error;
889 }
890 
ha_end()891 void ha_end() {
892   // Unregister handler error messages.
893   my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
894   my_free(handler_errmsgs);
895 }
896 
dropdb_handlerton(THD *,plugin_ref plugin,void * path)897 static bool dropdb_handlerton(THD *, plugin_ref plugin, void *path) {
898   handlerton *hton = plugin_data<handlerton *>(plugin);
899   if (hton->state == SHOW_OPTION_YES && hton->drop_database)
900     hton->drop_database(hton, (char *)path);
901   return false;
902 }
903 
ha_drop_database(char * path)904 void ha_drop_database(char *path) {
905   plugin_foreach(nullptr, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
906 }
907 
closecon_handlerton(THD * thd,plugin_ref plugin,void *)908 static bool closecon_handlerton(THD *thd, plugin_ref plugin, void *) {
909   handlerton *hton = plugin_data<handlerton *>(plugin);
910   /*
911     there's no need to rollback here as all transactions must
912     be rolled back already
913   */
914   if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton)) {
915     if (hton->close_connection) hton->close_connection(hton, thd);
916     /* make sure ha_data is reset and ha_data_lock is released */
917     thd_set_ha_data(thd, hton, nullptr);
918   }
919   return false;
920 }
921 
922 /**
923   @note
924     don't bother to rollback here, it's done already
925 */
ha_close_connection(THD * thd)926 void ha_close_connection(THD *thd) {
927   plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
928                  nullptr);
929 }
930 
kill_handlerton(THD * thd,plugin_ref plugin,void *)931 static bool kill_handlerton(THD *thd, plugin_ref plugin, void *) {
932   handlerton *hton = plugin_data<handlerton *>(plugin);
933 
934   if (hton->state == SHOW_OPTION_YES && hton->kill_connection) {
935     if (thd_get_ha_data(thd, hton)) hton->kill_connection(hton, thd);
936   }
937 
938   return false;
939 }
940 
ha_kill_connection(THD * thd)941 void ha_kill_connection(THD *thd) {
942   plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
943 }
944 
945 /** Invoke handlerton::pre_dd_shutdown() on a plugin.
946 @param plugin	storage engine plugin
947 @retval false (always) */
pre_dd_shutdown_handlerton(THD *,plugin_ref plugin,void *)948 static bool pre_dd_shutdown_handlerton(THD *, plugin_ref plugin, void *) {
949   handlerton *hton = plugin_data<handlerton *>(plugin);
950   if (hton->state == SHOW_OPTION_YES && hton->pre_dd_shutdown)
951     hton->pre_dd_shutdown(hton);
952   return false;
953 }
954 
955 /** Invoke handlerton::pre_dd_shutdown() on every storage engine plugin. */
ha_pre_dd_shutdown(void)956 void ha_pre_dd_shutdown(void) {
957   plugin_foreach(nullptr, pre_dd_shutdown_handlerton,
958                  MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
959 }
960 
961 /* ========================================================================
962  ======================= TRANSACTIONS ===================================*/
963 
964 /**
965   Transaction handling in the server
966   ==================================
967 
968   In each client connection, MySQL maintains two transactional
969   states:
970   - a statement transaction,
971   - a standard, also called normal transaction.
972 
973   Historical note
974   ---------------
975   "Statement transaction" is a non-standard term that comes
976   from the times when MySQL supported BerkeleyDB storage engine.
977 
978   First of all, it should be said that in BerkeleyDB auto-commit
979   mode auto-commits operations that are atomic to the storage
980   engine itself, such as a write of a record, and are too
981   high-granular to be atomic from the application perspective
982   (MySQL). One SQL statement could involve many BerkeleyDB
983   auto-committed operations and thus BerkeleyDB auto-commit was of
984   little use to MySQL.
985 
986   Secondly, instead of SQL standard savepoints, BerkeleyDB
987   provided the concept of "nested transactions". In a nutshell,
988   transactions could be arbitrarily nested, but when the parent
989   transaction was committed or aborted, all its child (nested)
990   transactions were handled committed or aborted as well.
991   Commit of a nested transaction, in turn, made its changes
992   visible, but not durable: it destroyed the nested transaction,
993   all its changes would become available to the parent and
994   currently active nested transactions of this parent.
995 
996   So the mechanism of nested transactions was employed to
997   provide "all or nothing" guarantee of SQL statements
998   required by the standard.
999   A nested transaction would be created at start of each SQL
1000   statement, and destroyed (committed or aborted) at statement
1001   end. Such nested transaction was internally referred to as
1002   a "statement transaction" and gave birth to the term.
1003 
1004   (Historical note ends)
1005 
1006   Since then a statement transaction is started for each statement
1007   that accesses transactional tables or uses the binary log.  If
1008   the statement succeeds, the statement transaction is committed.
1009   If the statement fails, the transaction is rolled back. Commits
1010   of statement transactions are not durable -- each such
1011   transaction is nested in the normal transaction, and if the
1012   normal transaction is rolled back, the effects of all enclosed
1013   statement transactions are undone as well.  Technically,
1014   a statement transaction can be viewed as a savepoint which is
1015   maintained automatically in order to make effects of one
1016   statement atomic.
1017 
1018   The normal transaction is started by the user and is ended
1019   usually upon a user request as well. The normal transaction
1020   encloses transactions of all statements issued between
1021   its beginning and its end.
1022   In autocommit mode, the normal transaction is equivalent
1023   to the statement transaction.
1024 
1025   Since MySQL supports PSEA (pluggable storage engine
1026   architecture), more than one transactional engine can be
1027   active at a time. Hence transactions, from the server
1028   point of view, are always distributed. In particular,
1029   transactional state is maintained independently for each
1030   engine. In order to commit a transaction the two phase
1031   commit protocol is employed.
1032 
1033   Not all statements are executed in context of a transaction.
1034   Administrative and status information statements do not modify
1035   engine data, and thus do not start a statement transaction and
1036   also have no effect on the normal transaction. Examples of such
1037   statements are SHOW STATUS and RESET SLAVE.
1038 
1039   Similarly DDL statements are not transactional,
1040   and therefore a transaction is [almost] never started for a DDL
1041   statement. The difference between a DDL statement and a purely
1042   administrative statement though is that a DDL statement always
1043   commits the current transaction before proceeding, if there is
1044   any.
1045 
1046   At last, SQL statements that work with non-transactional
1047   engines also have no effect on the transaction state of the
1048   connection. Even though they are written to the binary log,
1049   and the binary log is, overall, transactional, the writes
1050   are done in "write-through" mode, directly to the binlog
1051   file, followed with a OS cache sync, in other words,
1052   bypassing the binlog undo log (translog).
1053   They do not commit the current normal transaction.
1054   A failure of a statement that uses non-transactional tables
1055   would cause a rollback of the statement transaction, but
1056   in case there no non-transactional tables are used,
1057   no statement transaction is started.
1058 
1059   Data layout
1060   -----------
1061 
1062   The server stores its transaction-related data in
1063   thd->transaction. This structure has two members of type
1064   THD_TRANS. These members correspond to the statement and
1065   normal transactions respectively:
1066 
1067   - thd->transaction.stmt contains a list of engines
1068   that are participating in the given statement
1069   - thd->transaction.all contains a list of engines that
1070   have participated in any of the statement transactions started
1071   within the context of the normal transaction.
1072   Each element of the list contains a pointer to the storage
1073   engine, engine-specific transactional data, and engine-specific
1074   transaction flags.
1075 
1076   In autocommit mode thd->transaction.all is empty.
1077   Instead, data of thd->transaction.stmt is
1078   used to commit/rollback the normal transaction.
1079 
1080   The list of registered engines has a few important properties:
1081   - no engine is registered in the list twice
1082   - engines are present in the list a reverse temporal order --
1083   new participants are always added to the beginning of the list.
1084 
1085   Transaction life cycle
1086   ----------------------
1087 
1088   When a new connection is established, thd->transaction
1089   members are initialized to an empty state.
1090   If a statement uses any tables, all affected engines
1091   are registered in the statement engine list. In
1092   non-autocommit mode, the same engines are registered in
1093   the normal transaction list.
1094   At the end of the statement, the server issues a commit
1095   or a roll back for all engines in the statement list.
1096   At this point transaction flags of an engine, if any, are
1097   propagated from the statement list to the list of the normal
1098   transaction.
1099   When commit/rollback is finished, the statement list is
1100   cleared. It will be filled in again by the next statement,
1101   and emptied again at the next statement's end.
1102 
1103   The normal transaction is committed in a similar way
1104   (by going over all engines in thd->transaction.all list)
1105   but at different times:
1106   - upon COMMIT SQL statement is issued by the user
1107   - implicitly, by the server, at the beginning of a DDL statement
1108   or SET AUTOCOMMIT={0|1} statement.
1109 
1110   The normal transaction can be rolled back as well:
1111   - if the user has requested so, by issuing ROLLBACK SQL
1112   statement
1113   - if one of the storage engines requested a rollback
1114   by setting thd->transaction_rollback_request. This may
1115   happen in case, e.g., when the transaction in the engine was
1116   chosen a victim of the internal deadlock resolution algorithm
1117   and rolled back internally. When such a situation happens, there
1118   is little the server can do and the only option is to rollback
1119   transactions in all other participating engines.  In this case
1120   the rollback is accompanied by an error sent to the user.
1121 
1122   As follows from the use cases above, the normal transaction
1123   is never committed when there is an outstanding statement
1124   transaction. In most cases there is no conflict, since
1125   commits of the normal transaction are issued by a stand-alone
1126   administrative or DDL statement, thus no outstanding statement
1127   transaction of the previous statement exists. Besides,
1128   all statements that manipulate with the normal transaction
1129   are prohibited in stored functions and triggers, therefore
1130   no conflicting situation can occur in a sub-statement either.
1131   The remaining rare cases when the server explicitly has
1132   to commit the statement transaction prior to committing the normal
1133   one cover error-handling scenarios (see for example
1134   SQLCOM_LOCK_TABLES).
1135 
1136   When committing a statement or a normal transaction, the server
1137   either uses the two-phase commit protocol, or issues a commit
1138   in each engine independently. The two-phase commit protocol
1139   is used only if:
1140   - all participating engines support two-phase commit (provide
1141     handlerton::prepare PSEA API call) and
1142   - transactions in at least two engines modify data (i.e. are
1143   not read-only).
1144 
1145   Note that the two phase commit is used for
1146   statement transactions, even though they are not durable anyway.
1147   This is done to ensure logical consistency of data in a multiple-
1148   engine transaction.
1149   For example, imagine that some day MySQL supports unique
1150   constraint checks deferred till the end of statement. In such
1151   case a commit in one of the engines may yield ER_DUP_KEY,
1152   and MySQL should be able to gracefully abort statement
1153   transactions of other participants.
1154 
1155   After the normal transaction has been committed,
1156   thd->transaction.all list is cleared.
1157 
1158   When a connection is closed, the current normal transaction, if
1159   any, is rolled back.
1160 
1161   Roles and responsibilities
1162   --------------------------
1163 
1164   The server has no way to know that an engine participates in
1165   the statement and a transaction has been started
1166   in it unless the engine says so. Thus, in order to be
1167   a part of a transaction, the engine must "register" itself.
1168   This is done by invoking trans_register_ha() server call.
1169   Normally the engine registers itself whenever handler::external_lock()
1170   is called. trans_register_ha() can be invoked many times: if
1171   an engine is already registered, the call does nothing.
1172   In case autocommit is not set, the engine must register itself
1173   twice -- both in the statement list and in the normal transaction
1174   list.
1175   In which list to register is a parameter of trans_register_ha().
1176 
1177   Note, that although the registration interface in itself is
1178   fairly clear, the current usage practice often leads to undesired
1179   effects. E.g. since a call to trans_register_ha() in most engines
1180   is embedded into implementation of handler::external_lock(), some
1181   DDL statements start a transaction (at least from the server
1182   point of view) even though they are not expected to. E.g.
1183   CREATE TABLE does not start a transaction, since
1184   handler::external_lock() is never called during CREATE TABLE. But
1185   CREATE TABLE ... SELECT does, since handler::external_lock() is
1186   called for the table that is being selected from. This has no
1187   practical effects currently, but must be kept in mind
1188   nevertheless.
1189 
1190   Once an engine is registered, the server will do the rest
1191   of the work.
1192 
1193   During statement execution, whenever any of data-modifying
1194   PSEA API methods is used, e.g. handler::write_row() or
1195   handler::update_row(), the read-write flag is raised in the
1196   statement transaction for the involved engine.
1197   Currently All PSEA calls are "traced", and the data can not be
1198   changed in a way other than issuing a PSEA call. Important:
1199   unless this invariant is preserved the server will not know that
1200   a transaction in a given engine is read-write and will not
1201   involve the two-phase commit protocol!
1202 
1203   At the end of a statement, server call trans_commit_stmt is
1204   invoked. This call in turn invokes handlerton::prepare()
1205   for every involved engine. Prepare is followed by a call
1206   to handlerton::commit_one_phase() If a one-phase commit
1207   will suffice, handlerton::prepare() is not invoked and
1208   the server only calls handlerton::commit_one_phase().
1209   At statement commit, the statement-related read-write
1210   engine flag is propagated to the corresponding flag in the
1211   normal transaction.  When the commit is complete, the list
1212   of registered engines is cleared.
1213 
1214   Rollback is handled in a similar fashion.
1215 
1216   Additional notes on DDL and the normal transaction.
1217   ---------------------------------------------------
1218 
1219   DDLs and operations with non-transactional engines
1220   do not "register" in thd->transaction lists, and thus do not
1221   modify the transaction state. Besides, each DDL in
1222   MySQL is prefixed with an implicit normal transaction commit
1223   (a call to trans_commit_implicit()), and thus leaves nothing
1224   to modify.
1225   However, as it has been pointed out with CREATE TABLE .. SELECT,
1226   some DDL statements can start a *new* transaction.
1227 
1228   Behaviour of the server in this case is currently badly
1229   defined.
1230   DDL statements use a form of "semantic" logging
1231   to maintain atomicity: if CREATE TABLE .. SELECT failed,
1232   the newly created table is deleted.
1233   In addition, some DDL statements issue interim transaction
1234   commits: e.g. ALTER TABLE issues a commit after data is copied
1235   from the original table to the internal temporary table. Other
1236   statements, e.g. CREATE TABLE ... SELECT do not always commit
1237   after itself.
1238   And finally there is a group of DDL statements such as
1239   RENAME/DROP TABLE that doesn't start a new transaction
1240   and doesn't commit.
1241 
1242   This diversity makes it hard to say what will happen if
1243   by chance a stored function is invoked during a DDL --
1244   whether any modifications it makes will be committed or not
1245   is not clear. Fortunately, SQL grammar of few DDLs allows
1246   invocation of a stored function.
1247 
1248   A consistent behaviour is perhaps to always commit the normal
1249   transaction after all DDLs, just like the statement transaction
1250   is always committed at the end of all statements.
1251 */
1252 
1253 /**
1254   Register a storage engine for a transaction.
1255 
1256   Every storage engine MUST call this function when it starts
1257   a transaction or a statement (that is it must be called both for the
1258   "beginning of transaction" and "beginning of statement").
1259   Only storage engines registered for the transaction/statement
1260   will know when to commit/rollback it.
1261 
1262   @note
1263     trans_register_ha is idempotent - storage engine may register many
1264     times per transaction.
1265 
1266 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid MY_ATTRIBUTE ((unused)))1267 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1268                        const ulonglong *trxid MY_ATTRIBUTE((unused))) {
1269   Ha_trx_info *ha_info;
1270   Transaction_ctx *trn_ctx = thd->get_transaction();
1271   Transaction_ctx::enum_trx_scope trx_scope =
1272       all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1273 
1274   DBUG_TRACE;
1275   DBUG_PRINT("enter", ("%s", all ? "all" : "stmt"));
1276 
1277   if (all) {
1278     /*
1279       Ensure no active backup engine data exists, unless the current
1280       transaction is from replication and in active xa state.
1281     */
1282     DBUG_ASSERT(
1283         thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr ||
1284         (thd->get_transaction()->xid_state()->has_state(XID_STATE::XA_ACTIVE)));
1285     DBUG_ASSERT(thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr ||
1286                 (thd->is_binlog_applier() || thd->slave_thread));
1287 
1288     thd->server_status |= SERVER_STATUS_IN_TRANS;
1289     if (thd->tx_read_only)
1290       thd->server_status |= SERVER_STATUS_IN_TRANS_READONLY;
1291     DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1292   }
1293 
1294   ha_info = thd->get_ha_data(ht_arg->slot)->ha_info + (all ? 1 : 0);
1295 
1296   if (ha_info->is_started()) {
1297     DBUG_ASSERT(trn_ctx->ha_trx_info(trx_scope));
1298     return; /* already registered, return */
1299   }
1300 
1301   trn_ctx->register_ha(trx_scope, ha_info, ht_arg);
1302   trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1303 
1304   if (ht_arg->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true);
1305 
1306   trn_ctx->xid_state()->set_query_id(thd->query_id);
1307 /*
1308   Register transaction start in performance schema if not done already.
1309   By doing this, we handle cases when the transaction is started implicitly in
1310   autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1311   executed statement is a single-statement transaction.
1312 
1313   Explicitly started transactions are handled in trans_begin().
1314 
1315   Do not register transactions in which binary log is the only participating
1316   transactional storage engine.
1317 */
1318 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1319   if (thd->m_transaction_psi == nullptr && ht_arg->db_type != DB_TYPE_BINLOG &&
1320       !thd->is_attachable_transaction_active()) {
1321     const XID *xid = trn_ctx->xid_state()->get_xid();
1322     bool autocommit = !thd->in_multi_stmt_transaction_mode();
1323     thd->m_transaction_psi = MYSQL_START_TRANSACTION(
1324         &thd->m_transaction_state, xid, trxid, thd->tx_isolation,
1325         thd->tx_read_only, autocommit);
1326     DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1327     gtid_set_performance_schema_values(thd);
1328   }
1329 #endif
1330 }
1331 
1332 /** XA Prepare one SE.
1333 @param[in]	thd	Session THD
1334 @param[in]	ht	SE handlerton
1335 @return 0 for success, 1 for error - entire transaction is rolled back. */
prepare_one_ht(THD * thd,handlerton * ht)1336 static int prepare_one_ht(THD *thd, handlerton *ht) {
1337   DBUG_TRACE;
1338   DBUG_ASSERT(!thd->status_var_aggregated);
1339   thd->status_var.ha_prepare_count++;
1340   if (ht->prepare) {
1341     DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1342       ha_rollback_trans(thd, true);
1343       return 1;
1344     });
1345     if (ht->prepare(ht, thd, true)) {
1346       ha_rollback_trans(thd, true);
1347       return 1;
1348     }
1349   } else {
1350     push_warning_printf(thd, Sql_condition::SL_WARNING, ER_ILLEGAL_HA,
1351                         ER_THD(thd, ER_ILLEGAL_HA),
1352                         ha_resolve_storage_engine_name(ht));
1353   }
1354   return 0;
1355 }
1356 
1357 /**
1358   @retval
1359     0   ok
1360   @retval
1361     1   error, transaction was rolled back
1362 */
ha_xa_prepare(THD * thd)1363 int ha_xa_prepare(THD *thd) {
1364   int error = 0;
1365   Transaction_ctx *trn_ctx = thd->get_transaction();
1366   DBUG_TRACE;
1367 
1368   if (trn_ctx->is_active(Transaction_ctx::SESSION)) {
1369     const Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION);
1370     bool gtid_error = false;
1371     bool need_clear_owned_gtid = false;
1372     std::tie(gtid_error, need_clear_owned_gtid) = commit_owned_gtids(thd, true);
1373     if (gtid_error) {
1374       DBUG_ASSERT(need_clear_owned_gtid);
1375 
1376       ha_rollback_trans(thd, true);
1377       error = 1;
1378       goto err;
1379     }
1380 
1381     /*
1382       Ensure externalization order for applier threads.
1383 
1384       Note: the calls to Commit_order_manager::wait/wait_and_finish() will be
1385             no-op for threads other than replication applier threads.
1386     */
1387     if (Commit_order_manager::wait(thd)) {
1388       thd->commit_error = THD::CE_NONE;
1389       ha_rollback_trans(thd, true);
1390       error = 1;
1391       gtid_error = true;
1392       goto err;
1393     }
1394 
1395     /* Allow GTID to be read by SE for XA prepare. */
1396     {
1397       Clone_handler::XA_Operation xa_guard(thd);
1398 
1399       /* Prepare binlog SE first, if there. */
1400       while (ha_info != nullptr && error == 0) {
1401         auto ht = ha_info->ht();
1402         if (ht->db_type == DB_TYPE_BINLOG) {
1403           error = prepare_one_ht(thd, ht);
1404           break;
1405         }
1406         ha_info = ha_info->next();
1407       }
1408       /* Prepare all SE other than binlog. */
1409       ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION);
1410       while (ha_info != nullptr && error == 0) {
1411         auto ht = ha_info->ht();
1412         error = prepare_one_ht(thd, ht);
1413         if (error != 0) {
1414           break;
1415         }
1416         ha_info = ha_info->next();
1417       }
1418     }
1419 
1420     DBUG_ASSERT(error != 0 || thd->get_transaction()->xid_state()->has_state(
1421                                   XID_STATE::XA_IDLE));
1422 
1423   err:
1424     /*
1425       After ensuring externalization order for applier thread, remove it
1426       from waiting (Commit Order Queue) and allow next applier thread to
1427       be ordered.
1428 
1429       Note: the calls to Commit_order_manager::wait_and_finish() will be
1430             no-op for threads other than replication applier threads.
1431     */
1432     Commit_order_manager::wait_and_finish(thd, error);
1433     gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1434   }
1435 
1436   return error;
1437 }
1438 
1439 /**
1440   Check if we can skip the two-phase commit.
1441 
1442   A helper function to evaluate if two-phase commit is mandatory.
1443   As a side effect, propagates the read-only/read-write flags
1444   of the statement transaction to its enclosing normal transaction.
1445 
1446   If we have at least two engines with read-write changes we must
1447   run a two-phase commit. Otherwise we can run several independent
1448   commits as the only transactional engine has read-write changes
1449   and others are read-only.
1450 
1451   @retval   0   All engines are read-only.
1452   @retval   1   We have the only engine with read-write changes.
1453   @retval   >1  More than one engine have read-write changes.
1454                 Note: return value might NOT be the exact number of
1455                 engines with read-write changes.
1456 */
1457 
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1458 static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1459                                                 bool all) {
1460   /* The number of storage engines that have actual changes. */
1461   unsigned rw_ha_count = 0;
1462   Ha_trx_info *ha_info;
1463 
1464   for (ha_info = ha_list; ha_info; ha_info = ha_info->next()) {
1465     if (ha_info->is_trx_read_write()) ++rw_ha_count;
1466 
1467     if (!all) {
1468       Ha_trx_info *ha_info_all =
1469           &thd->get_ha_data(ha_info->ht()->slot)->ha_info[1];
1470       DBUG_ASSERT(ha_info != ha_info_all);
1471       /*
1472         Merge read-only/read-write information about statement
1473         transaction to its enclosing normal transaction. Do this
1474         only if in a real transaction -- that is, if we know
1475         that ha_info_all is registered in thd->transaction.all.
1476         Since otherwise we only clutter the normal transaction flags.
1477       */
1478       if (ha_info_all->is_started()) /* false if autocommit. */
1479         ha_info_all->coalesce_trx_with(ha_info);
1480     } else if (rw_ha_count > 1) {
1481       /*
1482         It is a normal transaction, so we don't need to merge read/write
1483         information up, and the need for two-phase commit has been
1484         already established. Break the loop prematurely.
1485       */
1486       break;
1487     }
1488   }
1489   return rw_ha_count;
1490 }
1491 
1492 /**
1493   The function computes condition to call gtid persistor wrapper,
1494   and executes it.
1495   It is invoked at committing a statement or transaction, including XA,
1496   and also at XA prepare handling.
1497 
1498   @param thd  Thread context.
1499   @param all  The execution scope, true for the transaction one, false
1500               for the statement one.
1501 
1502   @return   std::pair containing: Error and Owned GTID release status
1503    Error
1504             @retval  0    Ok
1505             @retval !0    Error
1506 
1507    Owned GTID release status
1508             @retval  true   remove the GTID owned by thread from owned GTIDs
1509             @retval  false  removal of the GTID owned by thread from owned GTIDs
1510                             is not required
1511 */
1512 
commit_owned_gtids(THD * thd,bool all)1513 std::pair<int, bool> commit_owned_gtids(THD *thd, bool all) {
1514   DBUG_TRACE;
1515   int error = 0;
1516   bool need_clear_owned_gtid = false;
1517 
1518   /*
1519     If the binary log is disabled for this thread (either by
1520     log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1521     slave thread), then the statement will not be written to
1522     the binary log. In this case, we should save its GTID into
1523     mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1524     did when binlog is enabled.
1525 
1526     We also skip saving GTID into mysql.gtid_executed table and
1527     @@GLOBAL.GTID_EXECUTED when slave-preserve-commit-order is enabled. We skip
1528     as GTID will be saved in
1529     Commit_order_manager::flush_engine_and_signal_threads (invoked from
1530     Commit_order_manager::wait_and_finish). In particular, there is the
1531     following call stack under ha_commit_low which save GTID in case its skipped
1532     here:
1533 
1534       ha_commit_low ->
1535       Commit_order_manager::wait_and_finish ->
1536       Commit_order_manager::finish ->
1537       Commit_order_manager::flush_engine_and_signal_threads ->
1538       Gtid_state::update_commit_group
1539 
1540     We also skip saving GTID for intermediate commits i.e. when
1541     thd->is_operating_substatement_implicitly is enabled.
1542   */
1543   if (thd->is_current_stmt_binlog_log_slave_updates_disabled() &&
1544       ending_trans(thd, all) && !thd->is_operating_gtid_table_implicitly &&
1545       !thd->is_operating_substatement_implicitly) {
1546     if (!has_commit_order_manager(thd) &&
1547         (thd->owned_gtid.sidno > 0 ||
1548          thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)) {
1549       need_clear_owned_gtid = true;
1550     }
1551 
1552     /*
1553       If GTID is not persisted by SE, write it to
1554       mysql.gtid_executed table.
1555     */
1556     if (thd->owned_gtid.sidno > 0 && !thd->se_persists_gtid()) {
1557       error = gtid_state->save(thd);
1558     }
1559   }
1560 
1561   return std::make_pair(error, need_clear_owned_gtid);
1562 }
1563 
1564 /**
1565   @param[in] thd                       Thread handle.
1566   @param[in] all                       Session transaction if true, statement
1567                                        otherwise.
1568   @param[in] ignore_global_read_lock   Allow commit to complete even if a
1569                                        global read lock is active. This can be
1570                                        used to allow changes to internal tables
1571                                        (e.g. slave status tables).
1572 
1573   @retval
1574     0   ok
1575   @retval
1576     1   transaction was rolled back
1577   @retval
1578     2   error during commit, data may be inconsistent
1579 
1580   @todo
1581     Since we don't support nested statement transactions in 5.0,
1582     we can't commit or rollback stmt transactions while we are inside
1583     stored functions or triggers. So we simply do nothing now.
1584     TODO: This should be fixed in later ( >= 5.1) releases.
1585 */
1586 
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1587 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock) {
1588   int error = 0;
1589   THD_STAGE_INFO(thd, stage_waiting_for_handler_commit);
1590   bool run_slave_post_commit = false;
1591   bool need_clear_owned_gtid = false;
1592   /*
1593     Save transaction owned gtid into table before transaction prepare
1594     if binlog is disabled, or binlog is enabled and log_slave_updates
1595     is disabled with slave SQL thread or slave worker thread.
1596   */
1597   std::tie(error, need_clear_owned_gtid) = commit_owned_gtids(thd, all);
1598 
1599   /*
1600     'all' means that this is either an explicit commit issued by
1601     user, or an implicit commit issued by a DDL.
1602   */
1603   Transaction_ctx *trn_ctx = thd->get_transaction();
1604   Transaction_ctx::enum_trx_scope trx_scope =
1605       all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1606 
1607   /*
1608     "real" is a nick name for a transaction for which a commit will
1609     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1610     transation is not 'real': even though it's possible to commit it,
1611     the changes are not durable as they might be rolled back if the
1612     enclosing 'all' transaction is rolled back.
1613   */
1614   bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1615 
1616   Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope);
1617   XID_STATE *xid_state = trn_ctx->xid_state();
1618 
1619   DBUG_TRACE;
1620 
1621   DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1622                       all, thd->in_sub_stmt, ha_info, is_real_trans));
1623   /*
1624     We must not commit the normal transaction if a statement
1625     transaction is pending. Otherwise statement transaction
1626     flags will not get propagated to its normal transaction's
1627     counterpart.
1628   */
1629   DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all);
1630 
1631   DBUG_EXECUTE_IF("pre_commit_error", {
1632     error = true;
1633     my_error(ER_UNKNOWN_ERROR, MYF(0));
1634   });
1635 
1636   /*
1637     When atomic DDL is executed on the slave, we would like to
1638     to update slave applier state as part of DDL's transaction.
1639     Call Relay_log_info::pre_commit() hook to do this before DDL
1640     gets committed in the following block.
1641     Failed atomic DDL statements should've been marked as executed/committed
1642     during statement rollback, though some like GRANT may continue until
1643     this point.
1644     When applying a DDL statement on a slave and the statement is filtered
1645     out by a table filter, we report an error "ER_SLAVE_IGNORED_TABLE" to
1646     warn slave applier thread. We need to save the DDL statement's gtid
1647     into mysql.gtid_executed system table if the binary log is disabled
1648     on the slave and gtids are enabled.
1649   */
1650   if (is_real_trans && is_atomic_ddl_commit_on_slave(thd) &&
1651       (!thd->is_error() ||
1652        (thd->is_operating_gtid_table_implicitly &&
1653         thd->get_stmt_da()->mysql_errno() == ER_SLAVE_IGNORED_TABLE))) {
1654     run_slave_post_commit = true;
1655     error = error || thd->rli_slave->pre_commit();
1656 
1657     DBUG_EXECUTE_IF("rli_pre_commit_error", {
1658       error = true;
1659       my_error(ER_UNKNOWN_ERROR, MYF(0));
1660     });
1661     DBUG_EXECUTE_IF("slave_crash_before_commit", {
1662       /* This pre-commit crash aims solely at atomic DDL */
1663       DBUG_SUICIDE();
1664     });
1665   }
1666 
1667   if (thd->in_sub_stmt) {
1668     DBUG_ASSERT(0);
1669     /*
1670       Since we don't support nested statement transactions in 5.0,
1671       we can't commit or rollback stmt transactions while we are inside
1672       stored functions or triggers. So we simply do nothing now.
1673       TODO: This should be fixed in later ( >= 5.1) releases.
1674     */
1675     if (!all) return 0;
1676     /*
1677       We assume that all statements which commit or rollback main transaction
1678       are prohibited inside of stored functions or triggers. So they should
1679       bail out with error even before ha_commit_trans() call. To be 100% safe
1680       let us throw error in non-debug builds.
1681     */
1682     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1683     return 2;
1684   }
1685 
1686   MDL_request mdl_request;
1687   bool release_mdl = false;
1688   if (ha_info && !error) {
1689     uint rw_ha_count = 0;
1690     bool rw_trans;
1691 
1692     DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1693 
1694     /*
1695      skip 2PC if the transaction is empty and it is not marked as started (which
1696      can happen when the slave's binlog is disabled)
1697     */
1698     if (ha_info->is_started())
1699       rw_ha_count = ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1700     trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1701     /* rw_trans is true when we in a transaction changing data */
1702     rw_trans = is_real_trans && (rw_ha_count > 0);
1703 
1704     DBUG_EXECUTE_IF("dbug.enabled_commit", {
1705       const char act[] = "now signal Reached wait_for signal.commit_continue";
1706       DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
1707     };);
1708     DEBUG_SYNC(thd, "ha_commit_trans_before_acquire_commit_lock");
1709     if (rw_trans && !ignore_global_read_lock) {
1710       /*
1711         Acquire a metadata lock which will ensure that COMMIT is blocked
1712         by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1713         COMMIT in progress blocks FTWRL).
1714 
1715         We allow the owner of FTWRL to COMMIT; we assume that it knows
1716         what it does.
1717       */
1718       MDL_REQUEST_INIT(&mdl_request, MDL_key::COMMIT, "", "",
1719                        MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT);
1720 
1721       DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1722       if (thd->mdl_context.acquire_lock(&mdl_request,
1723                                         thd->variables.lock_wait_timeout)) {
1724         ha_rollback_trans(thd, all);
1725         return 1;
1726       }
1727       release_mdl = true;
1728 
1729       DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1730     }
1731 
1732     if (rw_trans && stmt_has_updated_trans_table(ha_info) &&
1733         check_readonly(thd, true)) {
1734       ha_rollback_trans(thd, all);
1735       error = 1;
1736       goto end;
1737     }
1738 
1739     if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1740       error = tc_log->prepare(thd, all);
1741   }
1742   /*
1743     The state of XA transaction is changed to Prepared, intermediately.
1744     It's going to change to the regular NOTR at the end.
1745     The fact of the Prepared state is of interest to binary logger.
1746   */
1747   if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE)) {
1748     DBUG_ASSERT(
1749         thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1750         static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() ==
1751             XA_ONE_PHASE);
1752 
1753     xid_state->set_state(XID_STATE::XA_PREPARED);
1754   }
1755   if (error || (error = tc_log->commit(thd, all))) {
1756     ha_rollback_trans(thd, all);
1757     error = 1;
1758     goto end;
1759   }
1760 /*
1761   Mark multi-statement (any autocommit mode) or single-statement
1762   (autocommit=1) transaction as rolled back
1763 */
1764 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1765   if (is_real_trans && thd->m_transaction_psi != nullptr) {
1766     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1767     thd->m_transaction_psi = nullptr;
1768   }
1769 #endif
1770   DBUG_EXECUTE_IF("crash_commit_after",
1771                   if (!thd->is_operating_gtid_table_implicitly)
1772                       DBUG_SUICIDE(););
1773 end:
1774   if (release_mdl && mdl_request.ticket) {
1775     /*
1776       We do not always immediately release transactional locks
1777       after ha_commit_trans() (see uses of ha_enable_transaction()),
1778       thus we release the commit blocker lock as soon as it's
1779       not needed.
1780     */
1781     DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1782     thd->mdl_context.release_lock(mdl_request.ticket);
1783   }
1784   /* Free resources and perform other cleanup even for 'empty' transactions. */
1785   if (is_real_trans) {
1786     trn_ctx->cleanup();
1787     thd->tx_priority = 0;
1788   }
1789 
1790   if (need_clear_owned_gtid) {
1791     thd->server_status &= ~SERVER_STATUS_IN_TRANS;
1792     /*
1793       Release the owned GTID when binlog is disabled, or binlog is
1794       enabled and log_slave_updates is disabled with slave SQL thread
1795       or slave worker thread.
1796     */
1797     if (error)
1798       gtid_state->update_on_rollback(thd);
1799     else
1800       gtid_state->update_on_commit(thd);
1801   } else {
1802     if (has_commit_order_manager(thd) && error) {
1803       gtid_state->update_on_rollback(thd);
1804     }
1805   }
1806   if (run_slave_post_commit) {
1807     DBUG_EXECUTE_IF("slave_crash_after_commit", DBUG_SUICIDE(););
1808 
1809     thd->rli_slave->post_commit(error != 0);
1810     /*
1811       SERVER_STATUS_IN_TRANS may've been gained by pre_commit alone
1812       when the main DDL transaction is filtered out of execution.
1813       In such case the status has to be reset now.
1814 
1815       TODO: move/refactor this handling onto trans_commit/commit_implicit()
1816             the caller level.
1817     */
1818     thd->server_status &= ~SERVER_STATUS_IN_TRANS;
1819   } else {
1820     DBUG_EXECUTE_IF("slave_crash_after_commit", {
1821       if (thd->slave_thread && thd->rli_slave &&
1822           thd->rli_slave->current_event &&
1823           thd->rli_slave->current_event->get_type_code() ==
1824               binary_log::XID_EVENT &&
1825           !thd->is_operating_substatement_implicitly &&
1826           !thd->is_operating_gtid_table_implicitly)
1827         DBUG_SUICIDE();
1828     });
1829   }
1830 
1831   return error;
1832 }
1833 
1834 /**
1835   Commit the sessions outstanding transaction.
1836 
1837   @pre thd->transaction.flags.commit_low == true
1838   @post thd->transaction.flags.commit_low == false
1839 
1840   @note This function does not care about global read lock; the caller
1841   should.
1842 
1843   @param[in]  thd  Thread handle.
1844   @param[in]  all  Is set in case of explicit commit
1845                    (COMMIT statement), or implicit commit
1846                    issued by DDL. Is not set when called
1847                    at the end of statement, even if
1848                    autocommit=1.
1849   @param[in]  run_after_commit
1850                    True by default, otherwise, does not execute
1851                    the after_commit hook in the function.
1852 */
1853 
ha_commit_low(THD * thd,bool all,bool run_after_commit)1854 int ha_commit_low(THD *thd, bool all, bool run_after_commit) {
1855   int error = 0;
1856   Transaction_ctx *trn_ctx = thd->get_transaction();
1857   Transaction_ctx::enum_trx_scope trx_scope =
1858       all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1859   Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1860 
1861   DBUG_TRACE;
1862 
1863   if (ha_info) {
1864     bool restore_backup_ha_data = false;
1865     /*
1866       At execution of XA COMMIT ONE PHASE binlog or slave applier
1867       reattaches the engine ha_data to THD, previously saved at XA START.
1868     */
1869     if (all && thd->rpl_unflag_detached_engine_ha_data()) {
1870       DBUG_PRINT("info", ("query='%s'", thd->query().str));
1871       DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1872       DBUG_ASSERT(
1873           static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() ==
1874           XA_ONE_PHASE);
1875       restore_backup_ha_data = true;
1876     }
1877 
1878     bool is_applier_wait_enabled = false;
1879 
1880     /*
1881       Preserve externalization and persistence order for applier threads.
1882 
1883       The conditions should be understood as follows:
1884 
1885       - When the binlog is enabled, this will be done from
1886         MYSQL_BIN_LOG::ordered_commit and should not be done here.
1887         Therefore, we have the condition
1888         thd->is_current_stmt_binlog_disabled().
1889 
1890       - This function is usually called once per statement, with
1891         all=false.  We should not preserve the commit order when this
1892         function is called in that context.  Therefore, we have the
1893         condition ending_trans(thd, all).
1894 
1895       - Statements such as ANALYZE/OPTIMIZE/REPAIR TABLE will call
1896         ha_commit_low multiple times with all=true from within
1897         mysql_admin_table, mysql_recreate_table, and
1898         handle_histogram_command. After returing to
1899         mysql_execute_command, it will call ha_commit_low a final
1900         time.  It is only in this final call that we should preserve
1901         the commit order. Therefore, we set the flag
1902         thd->is_operating_substatement_implicitly while executing
1903         mysql_admin_table, mysql_recreate_table, and
1904         handle_histogram_command, clear it when returning from those
1905         functions, and check the flag here in ha_commit_low().
1906 
1907       - In all the above cases, we should make the current transaction
1908         fail early in case a previous transaction has rolled back.
1909         Therefore, we also invoke the commit order manager in case
1910         get_rollback_status returns true.
1911 
1912       Note: the calls to Commit_order_manager::wait/wait_and_finish() will be
1913             no-op for threads other than replication applier threads.
1914     */
1915     if ((!thd->is_operating_substatement_implicitly &&
1916          !thd->is_operating_gtid_table_implicitly &&
1917          thd->is_current_stmt_binlog_log_slave_updates_disabled() &&
1918          ending_trans(thd, all)) ||
1919         Commit_order_manager::get_rollback_status(thd)) {
1920       if (Commit_order_manager::wait(thd)) {
1921         error = 1;
1922         /*
1923           Remove applier thread from waiting in Commit Order Queue and
1924           allow next applier thread to be ordered.
1925         */
1926         Commit_order_manager::wait_and_finish(thd, error);
1927         goto err;
1928       }
1929       is_applier_wait_enabled = true;
1930     }
1931 
1932     for (; ha_info; ha_info = ha_info_next) {
1933       int err;
1934       handlerton *ht = ha_info->ht();
1935       if ((err = ht->commit(ht, thd, all))) {
1936         char errbuf[MYSQL_ERRMSG_SIZE];
1937         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err,
1938                  my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
1939         error = 1;
1940       }
1941       DBUG_ASSERT(!thd->status_var_aggregated);
1942       thd->status_var.ha_commit_count++;
1943       ha_info_next = ha_info->next();
1944       if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht);
1945       ha_info->reset(); /* keep it conveniently zero-filled */
1946     }
1947     trn_ctx->reset_scope(trx_scope);
1948 
1949     /*
1950       After ensuring externalization order for applier thread, remove it
1951       from waiting (Commit Order Queue) and allow next applier thread to
1952       be ordered.
1953 
1954       Note: the calls to Commit_order_manager::wait_and_finish() will be
1955             no-op for threads other than replication applier threads.
1956     */
1957     if (is_applier_wait_enabled) {
1958       Commit_order_manager::wait_and_finish(thd, error);
1959     }
1960   }
1961 
1962 err:
1963   /* Free resources and perform other cleanup even for 'empty' transactions. */
1964   if (all) trn_ctx->cleanup();
1965   /*
1966     When the transaction has been committed, we clear the commit_low
1967     flag. This allow other parts of the system to check if commit_low
1968     was called.
1969   */
1970   trn_ctx->m_flags.commit_low = false;
1971   if (run_after_commit && thd->get_transaction()->m_flags.run_hooks) {
1972     /*
1973        If commit succeeded, we call the after_commit hook.
1974 
1975        TODO: Investigate if this can be refactored so that there is
1976              only one invocation of this hook in the code (in
1977              MYSQL_LOG_BIN::finish_commit).
1978     */
1979     if (!error) (void)RUN_HOOK(transaction, after_commit, (thd, all));
1980     trn_ctx->m_flags.run_hooks = false;
1981   }
1982   return error;
1983 }
1984 
ha_rollback_low(THD * thd,bool all)1985 int ha_rollback_low(THD *thd, bool all) {
1986   Transaction_ctx *trn_ctx = thd->get_transaction();
1987   int error = 0;
1988   Transaction_ctx::enum_trx_scope trx_scope =
1989       all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1990   Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1991 
1992   (void)RUN_HOOK(transaction, before_rollback, (thd, all));
1993 
1994   if (ha_info) {
1995     bool restore_backup_ha_data = false;
1996     /*
1997       Similarly to the commit case, the binlog or slave applier
1998       reattaches the engine ha_data to THD.
1999     */
2000     if (all && thd->rpl_unflag_detached_engine_ha_data()) {
2001       DBUG_ASSERT(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
2002                   thd->killed == THD::KILL_CONNECTION);
2003 
2004       restore_backup_ha_data = true;
2005     }
2006 
2007     for (; ha_info; ha_info = ha_info_next) {
2008       int err;
2009       handlerton *ht = ha_info->ht();
2010       if ((err = ht->rollback(ht, thd, all))) {  // cannot happen
2011         char errbuf[MYSQL_ERRMSG_SIZE];
2012         my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2013                  my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2014         error = 1;
2015       }
2016       DBUG_ASSERT(!thd->status_var_aggregated);
2017       thd->status_var.ha_rollback_count++;
2018       ha_info_next = ha_info->next();
2019       if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht);
2020       ha_info->reset(); /* keep it conveniently zero-filled */
2021     }
2022     trn_ctx->reset_scope(trx_scope);
2023   }
2024 
2025   /*
2026     Thanks to possibility of MDL deadlock rollback request can come even if
2027     transaction hasn't been started in any transactional storage engine.
2028 
2029     It is possible to have a call of ha_rollback_low() while handling
2030     failure from ha_xa_prepare() and an error in Daignostics_area still
2031     wasn't set. Therefore it is required to check that an error in
2032     Diagnostics_area is set before calling the method XID_STATE::set_error().
2033 
2034     If it wasn't done it would lead to failure of the assertion
2035       DBUG_ASSERT(m_status == DA_ERROR)
2036     in the method Diagnostics_area::mysql_errno().
2037 
2038     In case ha_xa_prepare is failed and an error wasn't set in Diagnostics_area
2039     the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2040     the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2041     returned by ha_xa_prepare() is handled.
2042   */
2043   if (all && thd->transaction_rollback_request && thd->is_error())
2044     trn_ctx->xid_state()->set_error(thd);
2045 
2046   (void)RUN_HOOK(transaction, after_rollback, (thd, all));
2047   return error;
2048 }
2049 
ha_rollback_trans(THD * thd,bool all)2050 int ha_rollback_trans(THD *thd, bool all) {
2051   int error = 0;
2052   Transaction_ctx *trn_ctx = thd->get_transaction();
2053   bool is_xa_rollback = trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2054 
2055   /*
2056     "real" is a nick name for a transaction for which a commit will
2057     make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2058     transaction is not 'real': even though it's possible to commit it,
2059     the changes are not durable as they might be rolled back if the
2060     enclosing 'all' transaction is rolled back.
2061     We establish the value of 'is_real_trans' by checking
2062     if it's an explicit COMMIT or BEGIN statement, or implicit
2063     commit issued by DDL (in these cases all == true),
2064     or if we're running in autocommit mode (it's only in the autocommit mode
2065     ha_commit_one_phase() is called with an empty
2066     transaction.all.ha_list, see why in trans_register_ha()).
2067   */
2068   bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2069 
2070   DBUG_TRACE;
2071 
2072   /*
2073     We must not rollback the normal transaction if a statement
2074     transaction is pending.
2075   */
2076   DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all);
2077 
2078   if (thd->in_sub_stmt) {
2079     DBUG_ASSERT(0);
2080     /*
2081       If we are inside stored function or trigger we should not commit or
2082       rollback current statement transaction. See comment in ha_commit_trans()
2083       call for more information.
2084     */
2085     if (!all) return 0;
2086     my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2087     return 1;
2088   }
2089 
2090   if (tc_log) error = tc_log->rollback(thd, all);
2091     /*
2092       Mark multi-statement (any autocommit mode) or single-statement
2093       (autocommit=1) transaction as rolled back
2094     */
2095 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2096   if (all || !thd->in_active_multi_stmt_transaction()) {
2097     MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2098     thd->m_transaction_psi = nullptr;
2099   }
2100 #endif
2101 
2102   /* Always cleanup. Even if nht==0. There may be savepoints. */
2103   if (is_real_trans) {
2104     trn_ctx->cleanup();
2105     thd->tx_priority = 0;
2106   }
2107 
2108   if (all) thd->transaction_rollback_request = false;
2109 
2110   /*
2111     Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2112     complete transaction is being rollback or autocommit=1.
2113     Notice, XA rollback has just invoked update_on_commit() through
2114     tc_log->*rollback* stack.
2115   */
2116   if (is_real_trans && !is_xa_rollback) {
2117 #ifndef XTRABACKUP
2118     /* gtid_state is uninitialized in xtrabackup, and this call
2119      would result in a no-op anyway, as the related thd var is
2120      disabled */
2121     gtid_state->update_on_rollback(thd);
2122 #endif
2123   }
2124 
2125   /*
2126     If the transaction cannot be rolled back safely, warn; don't warn if this
2127     is a slave thread (because when a slave thread executes a ROLLBACK, it has
2128     been read from the binary log, so it's 100% sure and normal to produce
2129     error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2130     slave SQL thread, it would not stop the thread but just be printed in
2131     the error log; but we don't want users to wonder why they have this
2132     message in the error log, so we don't send it.
2133   */
2134   if (is_real_trans &&
2135       trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION) &&
2136       !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2137     trn_ctx->push_unsafe_rollback_warnings(thd);
2138 
2139   return error;
2140 }
2141 
2142 /**
2143   Commit the attachable transaction in storage engines.
2144 
2145   @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2146         which commits attachable transaction but skips code which is
2147         unnecessary and unsafe for them (like dealing with GTIDs).
2148         Since attachable transactions are read-only their commit only
2149         needs to release resources and cleanup state in SE.
2150 
2151   @param thd     Current thread
2152 
2153   @retval 0      - Success
2154   @retval non-0  - Failure
2155 */
ha_commit_attachable(THD * thd)2156 int ha_commit_attachable(THD *thd) {
2157   int error = 0;
2158   Transaction_ctx *trn_ctx = thd->get_transaction();
2159   Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2160   Ha_trx_info *ha_info_next;
2161 
2162   /* This function only handles attachable transactions. */
2163   DBUG_ASSERT(thd->is_attachable_ro_transaction_active());
2164   /*
2165     Since the attachable transaction is AUTOCOMMIT we only need
2166     to care about statement transaction.
2167   */
2168   DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::SESSION));
2169 
2170   if (ha_info) {
2171     for (; ha_info; ha_info = ha_info_next) {
2172       /* Attachable transaction is not supposed to modify anything. */
2173       DBUG_ASSERT(!ha_info->is_trx_read_write());
2174 
2175       handlerton *ht = ha_info->ht();
2176       if (ht->commit(ht, thd, false)) {
2177         /*
2178           In theory this should not happen since attachable transactions
2179           are read only and therefore commit is supposed to only release
2180           resources/cleanup state. Even if this happens we will simply
2181           continue committing attachable transaction in other SEs.
2182         */
2183         DBUG_ASSERT(false);
2184         error = 1;
2185       }
2186       DBUG_ASSERT(!thd->status_var_aggregated);
2187       thd->status_var.ha_commit_count++;
2188       ha_info_next = ha_info->next();
2189 
2190       ha_info->reset(); /* keep it conveniently zero-filled */
2191     }
2192     trn_ctx->reset_scope(Transaction_ctx::STMT);
2193   }
2194 
2195   /*
2196     Mark transaction as commited in PSI.
2197   */
2198 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2199   if (thd->m_transaction_psi != nullptr) {
2200     MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2201     thd->m_transaction_psi = nullptr;
2202   }
2203 #endif
2204 
2205   /* Free resources and perform other cleanup even for 'empty' transactions. */
2206   trn_ctx->cleanup();
2207 
2208   return (error);
2209 }
2210 
2211 /**
2212   Check if all storage engines used in transaction agree that after
2213   rollback to savepoint it is safe to release MDL locks acquired after
2214   savepoint creation.
2215 
2216   @param thd   The client thread that executes the transaction.
2217 
2218   @return true  - It is safe to release MDL locks.
2219           false - If it is not.
2220 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2221 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd) {
2222   Ha_trx_info *ha_info;
2223   Transaction_ctx *trn_ctx = thd->get_transaction();
2224   Transaction_ctx::enum_trx_scope trx_scope =
2225       thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2226 
2227   DBUG_TRACE;
2228 
2229   /**
2230     Checking whether it is safe to release metadata locks after rollback to
2231     savepoint in all the storage engines that are part of the transaction.
2232   */
2233   for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info;
2234        ha_info = ha_info->next()) {
2235     handlerton *ht = ha_info->ht();
2236     DBUG_ASSERT(ht);
2237 
2238     if (ht->savepoint_rollback_can_release_mdl == nullptr ||
2239         ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2240       return false;
2241   }
2242 
2243   return true;
2244 }
2245 
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2246 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) {
2247   int error = 0;
2248   Transaction_ctx *trn_ctx = thd->get_transaction();
2249   Transaction_ctx::enum_trx_scope trx_scope =
2250       !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2251 
2252   Ha_trx_info *ha_info, *ha_info_next;
2253 
2254   DBUG_TRACE;
2255 
2256   trn_ctx->set_rw_ha_count(trx_scope, 0);
2257   trn_ctx->set_no_2pc(trx_scope, false);
2258   /*
2259     rolling back to savepoint in all storage engines that were part of the
2260     transaction when the savepoint was set
2261   */
2262   for (ha_info = sv->ha_list; ha_info; ha_info = ha_info->next()) {
2263     int err;
2264     handlerton *ht = ha_info->ht();
2265     DBUG_ASSERT(ht);
2266     DBUG_ASSERT(ht->savepoint_set != nullptr);
2267     if ((err = ht->savepoint_rollback(
2268              ht, thd,
2269              (uchar *)(sv + 1) + ht->savepoint_offset))) {  // cannot happen
2270       char errbuf[MYSQL_ERRMSG_SIZE];
2271       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2272                my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2273       error = 1;
2274     }
2275     DBUG_ASSERT(!thd->status_var_aggregated);
2276     thd->status_var.ha_savepoint_rollback_count++;
2277     if (ht->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true);
2278   }
2279 
2280   /*
2281     rolling back the transaction in all storage engines that were not part of
2282     the transaction when the savepoint was set
2283   */
2284   for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2285        ha_info = ha_info_next) {
2286     int err;
2287     handlerton *ht = ha_info->ht();
2288     if ((err = ht->rollback(ht, thd, !thd->in_sub_stmt))) {  // cannot happen
2289       char errbuf[MYSQL_ERRMSG_SIZE];
2290       my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2291                my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2292       error = 1;
2293     }
2294     DBUG_ASSERT(!thd->status_var_aggregated);
2295     thd->status_var.ha_rollback_count++;
2296     ha_info_next = ha_info->next();
2297     ha_info->reset(); /* keep it conveniently zero-filled */
2298   }
2299   trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2300 
2301 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2302   if (thd->m_transaction_psi != nullptr)
2303     MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2304 #endif
2305 
2306   return error;
2307 }
2308 
ha_prepare_low(THD * thd,bool all)2309 int ha_prepare_low(THD *thd, bool all) {
2310   int error = 0;
2311   Transaction_ctx::enum_trx_scope trx_scope =
2312       all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2313   Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope);
2314 
2315   DBUG_TRACE;
2316 
2317   if (ha_info) {
2318     for (; ha_info && !error; ha_info = ha_info->next()) {
2319       int err = 0;
2320       handlerton *ht = ha_info->ht();
2321       /*
2322         Do not call two-phase commit if this particular
2323         transaction is read-only. This allows for simpler
2324         implementation in engines that are always read-only.
2325       */
2326       if (!ha_info->is_trx_read_write()) continue;
2327       if ((err = ht->prepare(ht, thd, all))) {
2328         char errbuf[MYSQL_ERRMSG_SIZE];
2329         my_error(ER_ERROR_DURING_COMMIT, MYF(0), err,
2330                  my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2331         error = 1;
2332       }
2333       DBUG_ASSERT(!thd->status_var_aggregated);
2334       thd->status_var.ha_prepare_count++;
2335     }
2336     DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2337   }
2338 
2339   return error;
2340 }
2341 
2342 /**
2343   @note
2344   according to the sql standard (ISO/IEC 9075-2:2003)
2345   section "4.33.4 SQL-statements and transaction states",
2346   SAVEPOINT is *not* transaction-initiating SQL-statement
2347 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2348 int ha_savepoint(THD *thd, SAVEPOINT *sv) {
2349   int error = 0;
2350   Transaction_ctx::enum_trx_scope trx_scope =
2351       !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2352   Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope);
2353   Ha_trx_info *begin_ha_info = ha_info;
2354 
2355   DBUG_TRACE;
2356 
2357   for (; ha_info; ha_info = ha_info->next()) {
2358     int err;
2359     handlerton *ht = ha_info->ht();
2360     DBUG_ASSERT(ht);
2361     if (!ht->savepoint_set) {
2362       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2363       error = 1;
2364       break;
2365     }
2366     if ((err = ht->savepoint_set(
2367              ht, thd,
2368              (uchar *)(sv + 1) + ht->savepoint_offset))) {  // cannot happen
2369       char errbuf[MYSQL_ERRMSG_SIZE];
2370       my_error(ER_GET_ERRNO, MYF(0), err,
2371                my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2372       error = 1;
2373     }
2374     DBUG_ASSERT(!thd->status_var_aggregated);
2375     thd->status_var.ha_savepoint_count++;
2376   }
2377   /*
2378     Remember the list of registered storage engines. All new
2379     engines are prepended to the beginning of the list.
2380   */
2381   sv->ha_list = begin_ha_info;
2382 
2383 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2384   if (!error && thd->m_transaction_psi != nullptr)
2385     MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2386 #endif
2387 
2388   return error;
2389 }
2390 
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2391 int ha_release_savepoint(THD *thd, SAVEPOINT *sv) {
2392   int error = 0;
2393   Ha_trx_info *ha_info = sv->ha_list;
2394   DBUG_TRACE;
2395 
2396   for (; ha_info; ha_info = ha_info->next()) {
2397     int err;
2398     handlerton *ht = ha_info->ht();
2399     /* Savepoint life time is enclosed into transaction life time. */
2400     DBUG_ASSERT(ht);
2401     if (!ht->savepoint_release) continue;
2402     if ((err = ht->savepoint_release(
2403              ht, thd,
2404              (uchar *)(sv + 1) + ht->savepoint_offset))) {  // cannot happen
2405       char errbuf[MYSQL_ERRMSG_SIZE];
2406       my_error(ER_GET_ERRNO, MYF(0), err,
2407                my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2408       error = 1;
2409     }
2410   }
2411   DBUG_EXECUTE_IF("fail_ha_release_savepoint", {
2412     my_error(ER_UNKNOWN_ERROR, MYF(0));
2413     error = 1;
2414   });
2415 
2416 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2417   if (thd->m_transaction_psi != nullptr)
2418     MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2419 #endif
2420   return error;
2421 }
2422 
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2423 static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg) {
2424   handlerton *hton = plugin_data<handlerton *>(plugin);
2425   if (hton->state == SHOW_OPTION_YES && hton->start_consistent_snapshot) {
2426     hton->start_consistent_snapshot(hton, thd);
2427     *((bool *)arg) = false;
2428   }
2429   return false;
2430 }
2431 
ha_start_consistent_snapshot(THD * thd)2432 int ha_start_consistent_snapshot(THD *thd) {
2433   bool warn = true;
2434 
2435   plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2436 
2437   /*
2438     Same idea as when one wants to CREATE TABLE in one engine which does not
2439     exist:
2440   */
2441   if (warn)
2442     push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2443                  "This MySQL server does not support any "
2444                  "consistent-read capable storage engine");
2445   return 0;
2446 }
2447 
flush_handlerton(THD *,plugin_ref plugin,void * arg)2448 static bool flush_handlerton(THD *, plugin_ref plugin, void *arg) {
2449   handlerton *hton = plugin_data<handlerton *>(plugin);
2450   if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2451       hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2452     return true;
2453   return false;
2454 }
2455 
ha_flush_logs(bool binlog_group_flush)2456 bool ha_flush_logs(bool binlog_group_flush) {
2457   if (plugin_foreach(nullptr, flush_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2458                      static_cast<void *>(&binlog_group_flush))) {
2459     return true;
2460   }
2461   return false;
2462 }
2463 
2464 /**
2465   @brief make canonical filename
2466 
2467   @param[in]  file     table handler
2468   @param[in]  path     original path
2469   @param[out] tmp_path buffer for canonized path
2470 
2471   @details Lower case db name and table name path parts for
2472            non file based tables when lower_case_table_names
2473            is 2 (store as is, compare in lower case).
2474            Filesystem path prefix (mysql_data_home or tmpdir)
2475            is left intact.
2476 
2477   @note tmp_path may be left intact if no conversion was
2478         performed.
2479 
2480   @retval canonized path
2481 
2482   @todo This may be done more efficiently when table path
2483         gets built. Convert this function to something like
2484         ASSERT_CANONICAL_FILENAME.
2485 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2486 const char *get_canonical_filename(handler *file, const char *path,
2487                                    char *tmp_path) {
2488   uint i;
2489   if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2490     return path;
2491 
2492   for (i = 0; i <= mysql_tmpdir_list.max; i++) {
2493     if (is_prefix(path, mysql_tmpdir_list.list[i])) return path;
2494   }
2495 
2496   /* Ensure that table handler get path in lower case */
2497   if (tmp_path != path) my_stpcpy(tmp_path, path);
2498 
2499   /*
2500     we only should turn into lowercase database/table part
2501     so start the process after homedirectory
2502   */
2503   my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2504   return tmp_path;
2505 }
2506 
2507 class Ha_delete_table_error_handler : public Internal_error_handler {
2508  public:
handle_condition(THD *,uint,const char *,Sql_condition::enum_severity_level * level,const char *)2509   virtual bool handle_condition(THD *, uint, const char *,
2510                                 Sql_condition::enum_severity_level *level,
2511                                 const char *) {
2512     /* Downgrade errors to warnings. */
2513     if (*level == Sql_condition::SL_ERROR) *level = Sql_condition::SL_WARNING;
2514     return false;
2515   }
2516 };
2517 
2518 /**
2519   Delete table from the storage engine.
2520 
2521   @param thd                Thread context.
2522   @param table_type         Handlerton for table's SE.
2523   @param path               Path to table (without extension).
2524   @param db                 Table database.
2525   @param alias              Table name.
2526   @param table_def          dd::Table object describing the table.
2527   @param generate_warning   Indicates whether errors during deletion
2528                             should be reported as warnings.
2529 
2530   @return  0 - in case of success, non-0 in case of failure, ENOENT
2531            if the file doesn't exists.
2532 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,const dd::Table * table_def,bool generate_warning)2533 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2534                     const char *db, const char *alias,
2535                     const dd::Table *table_def, bool generate_warning) {
2536   handler *file;
2537   char tmp_path[FN_REFLEN];
2538   int error;
2539   TABLE dummy_table;
2540   TABLE_SHARE dummy_share;
2541   DBUG_TRACE;
2542 
2543   dummy_table.s = &dummy_share;
2544 
2545   /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2546   if (table_type == nullptr ||
2547       !(file =
2548             get_new_handler((TABLE_SHARE *)nullptr,
2549                             table_def->partition_type() != dd::Table::PT_NONE,
2550                             thd->mem_root, table_type))) {
2551     return ENOENT;
2552   }
2553 
2554   path = get_canonical_filename(file, path, tmp_path);
2555 
2556   if ((error = file->ha_delete_table(path, table_def)) && generate_warning) {
2557     /*
2558       Because file->print_error() use my_error() to generate the error message
2559       we use an internal error handler to intercept it and store the text
2560       in a temporary buffer. Later the message will be presented to user
2561       as a warning.
2562     */
2563     Ha_delete_table_error_handler ha_delete_table_error_handler;
2564 
2565     /* Fill up strucutures that print_error may need */
2566     dummy_share.path.str = const_cast<char *>(path);
2567     dummy_share.path.length = strlen(path);
2568     dummy_share.db.str = db;
2569     dummy_share.db.length = strlen(db);
2570     dummy_share.table_name.str = alias;
2571     dummy_share.table_name.length = strlen(alias);
2572     dummy_table.alias = alias;
2573 
2574     file->change_table_ptr(&dummy_table, &dummy_share);
2575 
2576     /*
2577       XXX: should we convert *all* errors to warnings here?
2578       What if the error is fatal?
2579     */
2580     thd->push_internal_handler(&ha_delete_table_error_handler);
2581     file->print_error(error, 0);
2582 
2583     thd->pop_internal_handler();
2584   }
2585 
2586   destroy(file);
2587 
2588 #ifdef HAVE_PSI_TABLE_INTERFACE
2589   if (likely(error == 0)) {
2590     /* Table share not available, so check path for temp_table prefix. */
2591     bool temp_table = (strstr(path, tmp_file_prefix) != nullptr);
2592     PSI_TABLE_CALL(drop_table_share)
2593     (temp_table, db, strlen(db), alias, strlen(alias));
2594   }
2595 #endif
2596 
2597   return error;
2598 }
2599 
2600 // Prepare HA_CREATE_INFO to be used by ALTER as well as upgrade code.
init_create_options_from_share(const TABLE_SHARE * share,uint used_fields)2601 void HA_CREATE_INFO::init_create_options_from_share(const TABLE_SHARE *share,
2602                                                     uint used_fields) {
2603   if (!(used_fields & HA_CREATE_USED_MIN_ROWS)) min_rows = share->min_rows;
2604 
2605   if (!(used_fields & HA_CREATE_USED_MAX_ROWS)) max_rows = share->max_rows;
2606 
2607   if (!(used_fields & HA_CREATE_USED_AVG_ROW_LENGTH))
2608     avg_row_length = share->avg_row_length;
2609 
2610   if (!(used_fields & HA_CREATE_USED_DEFAULT_CHARSET))
2611     default_table_charset = share->table_charset;
2612 
2613   if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE))
2614     key_block_size = share->key_block_size;
2615 
2616   if (!(used_fields & HA_CREATE_USED_STATS_SAMPLE_PAGES))
2617     stats_sample_pages = share->stats_sample_pages;
2618 
2619   if (!(used_fields & HA_CREATE_USED_STATS_AUTO_RECALC))
2620     stats_auto_recalc = share->stats_auto_recalc;
2621 
2622   if (!(used_fields & HA_CREATE_USED_TABLESPACE))
2623     tablespace = share->tablespace;
2624 
2625   if (storage_media == HA_SM_DEFAULT)
2626     storage_media = share->default_storage_media;
2627 
2628   /* Creation of federated table with LIKE clause needs connection string */
2629   if (!(used_fields & HA_CREATE_USED_CONNECTION))
2630     connect_string = share->connect_string;
2631 
2632   if (!(used_fields & HA_CREATE_USED_COMMENT)) {
2633     // Assert to check that used_fields flag and comment are in sync.
2634     DBUG_ASSERT(!comment.str);
2635     comment = share->comment;
2636   }
2637 
2638   if (!(used_fields & HA_CREATE_USED_COMPRESS)) {
2639     // Assert to check that used_fields flag and compress are in sync
2640     DBUG_ASSERT(!compress.str);
2641     compress = share->compress;
2642   }
2643 
2644   if (!(used_fields & (HA_CREATE_USED_ENCRYPT))) {
2645     // Assert to check that used_fields flag and encrypt_type are in sync
2646     DBUG_ASSERT(!encrypt_type.str);
2647     encrypt_type = share->encrypt_type;
2648   }
2649 
2650   if (!(used_fields & HA_CREATE_USED_SECONDARY_ENGINE)) {
2651     DBUG_ASSERT(secondary_engine.str == nullptr);
2652     secondary_engine = share->secondary_engine;
2653   }
2654 
2655   if (engine_attribute.str == nullptr)
2656     engine_attribute = share->engine_attribute;
2657 
2658   if (secondary_engine_attribute.str == nullptr)
2659     secondary_engine_attribute = share->secondary_engine_attribute;
2660 }
2661 
2662 /****************************************************************************
2663 ** General handler functions
2664 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2665 handler *handler::clone(const char *name, MEM_ROOT *mem_root) {
2666   DBUG_TRACE;
2667 
2668   handler *new_handler = get_new_handler(
2669       table->s, (table->s->m_part_info != nullptr), mem_root, ht);
2670 
2671   if (!new_handler) return nullptr;
2672   if (new_handler->set_ha_share_ref(ha_share)) goto err;
2673 
2674   /*
2675     Allocate handler->ref here because otherwise ha_open will allocate it
2676     on this->table->mem_root and we will not be able to reclaim that memory
2677     when the clone handler object is destroyed.
2678   */
2679   if (!(new_handler->ref =
2680             (uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2)))
2681     goto err;
2682   /*
2683     TODO: Implement a more efficient way to have more than one index open for
2684     the same table instance. The ha_open call is not cachable for clone.
2685   */
2686   if (new_handler->ha_open(table, name, table->db_stat,
2687                            HA_OPEN_IGNORE_IF_LOCKED, nullptr))
2688     goto err;
2689 
2690   return new_handler;
2691 
2692 err:
2693   destroy(new_handler);
2694   return nullptr;
2695 }
2696 
ha_statistic_increment(ulonglong System_status_var::* offset) const2697 void handler::ha_statistic_increment(
2698     ulonglong System_status_var::*offset) const {
2699   if (table && table->in_use) (table->in_use->status_var.*offset)++;
2700 }
2701 
ha_thd(void) const2702 THD *handler::ha_thd(void) const {
2703   DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2704   return (table && table->in_use) ? table->in_use : current_thd;
2705 }
2706 
unbind_psi()2707 void handler::unbind_psi() {
2708 #ifdef HAVE_PSI_TABLE_INTERFACE
2709   DBUG_ASSERT(m_lock_type == F_UNLCK);
2710   DBUG_ASSERT(inited == NONE);
2711   /*
2712     Notify the instrumentation that this table is not owned
2713     by this thread any more.
2714   */
2715   PSI_TABLE_CALL(unbind_table)(m_psi);
2716 #endif
2717 }
2718 
rebind_psi()2719 void handler::rebind_psi() {
2720 #ifdef HAVE_PSI_TABLE_INTERFACE
2721   DBUG_ASSERT(m_lock_type == F_UNLCK);
2722   DBUG_ASSERT(inited == NONE);
2723   /*
2724     Notify the instrumentation that this table is now owned
2725     by this thread.
2726   */
2727   PSI_table_share *share_psi = ha_table_share_psi(table_share);
2728   m_psi = PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2729 #endif
2730 }
2731 
start_psi_batch_mode()2732 void handler::start_psi_batch_mode() {
2733 #ifdef HAVE_PSI_TABLE_INTERFACE
2734   DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2735   DBUG_ASSERT(m_psi_locker == nullptr);
2736   m_psi_batch_mode = PSI_BATCH_MODE_STARTING;
2737   m_psi_numrows = 0;
2738 #endif
2739 }
2740 
end_psi_batch_mode()2741 void handler::end_psi_batch_mode() {
2742 #ifdef HAVE_PSI_TABLE_INTERFACE
2743   DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2744   if (m_psi_locker != nullptr) {
2745     DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2746     PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2747     m_psi_locker = nullptr;
2748   }
2749   m_psi_batch_mode = PSI_BATCH_MODE_NONE;
2750 #endif
2751 }
2752 
ha_table_share_psi(const TABLE_SHARE * share) const2753 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const {
2754   return share->m_psi;
2755 }
2756 
2757 /*
2758   Open database handler object.
2759 
2760   Used for opening tables. The name will be the name of the file.
2761   A table is opened when it needs to be opened. For instance
2762   when a request comes in for a select on the table (tables are not
2763   open and closed for each request, they are cached).
2764 
2765   The server opens all tables by calling ha_open() which then calls
2766   the handler specific open().
2767 
2768   Try O_RDONLY if cannot open as O_RDWR. Don't wait for locks if not
2769   HA_OPEN_WAIT_IF_LOCKED is set
2770 
2771   @param  [out] table_arg             Table structure.
2772   @param        name                  Full path of table name.
2773   @param        mode                  Open mode flags.
2774   @param        test_if_locked        ?
2775   @param        table_def             dd::Table object describing table
2776                                       being open. Can be NULL for temporary
2777                                       tables created by optimizer.
2778 
2779   @retval >0    Error.
2780   @retval  0    Success.
2781 */
2782 
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked,const dd::Table * table_def)2783 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2784                      int test_if_locked, const dd::Table *table_def) {
2785   int error;
2786   DBUG_TRACE;
2787   DBUG_PRINT("enter",
2788              ("name: %s  db_type: %d  db_stat: %d  mode: %d  lock_test: %d",
2789               name, ht->db_type, table_arg->db_stat, mode, test_if_locked));
2790 
2791   table = table_arg;
2792   DBUG_ASSERT(table->s == table_share);
2793   DBUG_ASSERT(m_lock_type == F_UNLCK);
2794   DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2795   MEM_ROOT *mem_root = (test_if_locked & HA_OPEN_TMP_TABLE)
2796                            ? &table->s->mem_root
2797                            : &table->mem_root;
2798   DBUG_ASSERT(alloc_root_inited(mem_root));
2799 
2800   if ((error = open(name, mode, test_if_locked, table_def))) {
2801     if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2802         (table->db_stat & HA_TRY_READ_ONLY)) {
2803       table->db_stat |= HA_READ_ONLY;
2804       error = open(name, O_RDONLY, test_if_locked, table_def);
2805     }
2806   }
2807   if (error) {
2808     set_my_errno(error); /* Safeguard */
2809     DBUG_PRINT("error", ("error: %d  errno: %d", error, errno));
2810   } else {
2811     DBUG_ASSERT(m_psi == nullptr);
2812     DBUG_ASSERT(table_share != nullptr);
2813 #ifdef HAVE_PSI_TABLE_INTERFACE
2814     PSI_table_share *share_psi = ha_table_share_psi(table_share);
2815     m_psi = PSI_TABLE_CALL(open_table)(share_psi, this);
2816 #endif
2817 
2818     if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2819       table->db_stat |= HA_READ_ONLY;
2820     (void)extra(HA_EXTRA_NO_READCHECK);  // Not needed in SQL
2821 
2822     /* ref is already allocated for us if we're called from handler::clone() */
2823     if (!ref && !(ref = (uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2))) {
2824       ha_close();
2825       error = HA_ERR_OUT_OF_MEM;
2826     } else
2827       dup_ref = ref + ALIGN_SIZE(ref_length);
2828 
2829     // Give the table a defined starting cursor, even if it never actually seeks
2830     // or writes. This is important for things like weedout on const tables
2831     // (which is a nonsensical combination, but can happen).
2832     memset(ref, 0, ref_length);
2833     cached_table_flags = table_flags();
2834   }
2835 
2836   return error;
2837 }
2838 
2839 /**
2840   Close handler.
2841 
2842   Called from sql_base.cc, sql_select.cc, and table.cc.
2843   In sql_select.cc it is only used to close up temporary tables or during
2844   the process where a temporary table is converted over to being a
2845   myisam table.
2846   For sql_base.cc look at close_data_tables().
2847 
2848   @return Operation status
2849     @retval 0     Success
2850     @retval != 0  Error (error code returned)
2851 */
2852 
ha_close(void)2853 int handler::ha_close(void) {
2854   DBUG_TRACE;
2855 #ifdef HAVE_PSI_TABLE_INTERFACE
2856   PSI_TABLE_CALL(close_table)(table_share, m_psi);
2857   m_psi = nullptr; /* instrumentation handle, invalid after close_table() */
2858   DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2859   DBUG_ASSERT(m_psi_locker == nullptr);
2860 #endif
2861   // TODO: set table= NULL to mark the handler as closed?
2862   DBUG_ASSERT(m_psi == nullptr);
2863   DBUG_ASSERT(m_lock_type == F_UNLCK);
2864   DBUG_ASSERT(inited == NONE);
2865   if (m_unique) {
2866     // It's allocated on memroot and will be freed along with it
2867     m_unique->cleanup();
2868     m_unique = nullptr;
2869   }
2870   return close();
2871 }
2872 
2873 /**
2874   Initialize use of index.
2875 
2876   @param idx     Index to use
2877   @param sorted  Use sorted order
2878 
2879   @return Operation status
2880     @retval 0     Success
2881     @retval != 0  Error (error code returned)
2882 */
2883 
ha_index_init(uint idx,bool sorted)2884 int handler::ha_index_init(uint idx, bool sorted) {
2885   DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2886   int result;
2887   DBUG_TRACE;
2888   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2889   DBUG_ASSERT(inited == NONE);
2890   if (!(result = index_init(idx, sorted))) inited = INDEX;
2891   end_range = nullptr;
2892   return result;
2893 }
2894 
2895 /**
2896   End use of index.
2897 
2898   @return Operation status
2899     @retval 0     Success
2900     @retval != 0  Error (error code returned)
2901 */
2902 
ha_index_end()2903 int handler::ha_index_end() {
2904   DBUG_TRACE;
2905   /* SQL HANDLER function can call this without having it locked. */
2906   DBUG_ASSERT(table->open_by_handler ||
2907               table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2908   DBUG_ASSERT(inited == INDEX);
2909   inited = NONE;
2910   end_range = nullptr;
2911   m_record_buffer = nullptr;
2912   if (m_unique) m_unique->reset(false);
2913   return index_end();
2914 }
2915 
2916 /**
2917   Initialize table for random read or scan.
2918 
2919   @param scan  if true: Initialize for random scans through rnd_next()
2920                if false: Initialize for random reads through rnd_pos()
2921 
2922   @return Operation status
2923     @retval 0     Success
2924     @retval != 0  Error (error code returned)
2925 */
2926 
ha_rnd_init(bool scan)2927 int handler::ha_rnd_init(bool scan) {
2928   DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2929   int result;
2930   DBUG_TRACE;
2931   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2932   DBUG_ASSERT(inited == NONE || (inited == RND && scan));
2933   inited = (result = rnd_init(scan)) ? NONE : RND;
2934   end_range = nullptr;
2935   return result;
2936 }
2937 
2938 /**
2939   End use of random access.
2940 
2941   @return Operation status
2942     @retval 0     Success
2943     @retval != 0  Error (error code returned)
2944 */
2945 
ha_rnd_end()2946 int handler::ha_rnd_end() {
2947   DBUG_TRACE;
2948   /* SQL HANDLER function can call this without having it locked. */
2949   DBUG_ASSERT(table->open_by_handler ||
2950               table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2951   DBUG_ASSERT(inited == RND);
2952   inited = NONE;
2953   end_range = nullptr;
2954   m_record_buffer = nullptr;
2955   return rnd_end();
2956 }
2957 
2958 /**
2959   Read next row via random scan.
2960 
2961   @param buf  Buffer to read the row into
2962 
2963   @return Operation status
2964     @retval 0     Success
2965     @retval != 0  Error (error code returned)
2966 */
2967 
ha_rnd_next(uchar * buf)2968 int handler::ha_rnd_next(uchar *buf) {
2969   int result;
2970   DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
2971   DBUG_TRACE;
2972   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2973   DBUG_ASSERT(inited == RND);
2974 
2975   // Set status for the need to update generated fields
2976   m_update_generated_read_fields = table->has_gcol();
2977 
2978   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2979                       { result = rnd_next(buf); })
2980   if (!result && m_update_generated_read_fields) {
2981     result = update_generated_read_fields(buf, table);
2982     m_update_generated_read_fields = false;
2983   }
2984   table->set_row_status_from_handler(result);
2985   return result;
2986 }
2987 
2988 /**
2989   Read row via random scan from position.
2990 
2991   @param[out] buf  Buffer to read the row into
2992   @param      pos  Position from position() call
2993 
2994   @return Operation status
2995     @retval 0     Success
2996     @retval != 0  Error (error code returned)
2997 */
2998 
ha_rnd_pos(uchar * buf,uchar * pos)2999 int handler::ha_rnd_pos(uchar *buf, uchar *pos) {
3000   int result;
3001   DBUG_TRACE;
3002   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3003   /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
3004   /* DBUG_ASSERT(inited == RND); */
3005 
3006   // Set status for the need to update generated fields
3007   m_update_generated_read_fields = table->has_gcol();
3008 
3009   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3010                       { result = rnd_pos(buf, pos); })
3011   if (!result && m_update_generated_read_fields) {
3012     result = update_generated_read_fields(buf, table);
3013     m_update_generated_read_fields = false;
3014   }
3015   table->set_row_status_from_handler(result);
3016   return result;
3017 }
3018 
ha_ft_read(uchar * buf)3019 int handler::ha_ft_read(uchar *buf) {
3020   int result;
3021   DBUG_TRACE;
3022 
3023   // Set status for the need to update generated fields
3024   m_update_generated_read_fields = table->has_gcol();
3025 
3026   result = ft_read(buf);
3027   if (!result && m_update_generated_read_fields) {
3028     result = update_generated_read_fields(buf, table);
3029     m_update_generated_read_fields = false;
3030   }
3031   table->set_row_status_from_handler(result);
3032   return result;
3033 }
3034 
ha_sample_init(void * & scan_ctx,double sampling_percentage,int sampling_seed,enum_sampling_method sampling_method)3035 int handler::ha_sample_init(void *&scan_ctx, double sampling_percentage,
3036                             int sampling_seed,
3037                             enum_sampling_method sampling_method) {
3038   DBUG_TRACE;
3039   DBUG_ASSERT(sampling_percentage >= 0.0);
3040   DBUG_ASSERT(sampling_percentage <= 100.0);
3041   DBUG_ASSERT(inited == NONE);
3042 
3043   // Initialise the random number generator.
3044   m_random_number_engine.seed(sampling_seed);
3045   m_sampling_percentage = sampling_percentage;
3046 
3047   int result = sample_init(scan_ctx, sampling_percentage, sampling_seed,
3048                            sampling_method);
3049   inited = (result != 0) ? NONE : SAMPLING;
3050   return result;
3051 }
3052 
ha_sample_end(void * scan_ctx)3053 int handler::ha_sample_end(void *scan_ctx) {
3054   DBUG_TRACE;
3055   DBUG_ASSERT(inited == SAMPLING);
3056   inited = NONE;
3057   int result = sample_end(scan_ctx);
3058   return result;
3059 }
3060 
ha_sample_next(void * scan_ctx,uchar * buf)3061 int handler::ha_sample_next(void *scan_ctx, uchar *buf) {
3062   DBUG_TRACE;
3063   DBUG_ASSERT(inited == SAMPLING);
3064 
3065   if (m_sampling_percentage == 0.0) return HA_ERR_END_OF_FILE;
3066 
3067   m_update_generated_read_fields = table->has_gcol();
3068 
3069   int result;
3070   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3071                       { result = sample_next(scan_ctx, buf); })
3072 
3073   if (result == 0 && m_update_generated_read_fields) {
3074     result = update_generated_read_fields(buf, table);
3075     m_update_generated_read_fields = false;
3076   }
3077   table->set_row_status_from_handler(result);
3078 
3079   return result;
3080 }
3081 
sample_init(void * & scan_ctx MY_ATTRIBUTE ((unused)),double,int,enum_sampling_method)3082 int handler::sample_init(void *&scan_ctx MY_ATTRIBUTE((unused)), double, int,
3083                          enum_sampling_method) {
3084   return rnd_init(true);
3085 }
3086 
sample_end(void * scan_ctx MY_ATTRIBUTE ((unused)))3087 int handler::sample_end(void *scan_ctx MY_ATTRIBUTE((unused))) {
3088   return rnd_end();
3089 }
3090 
sample_next(void * scan_ctx MY_ATTRIBUTE ((unused)),uchar * buf)3091 int handler::sample_next(void *scan_ctx MY_ATTRIBUTE((unused)), uchar *buf) {
3092   // Temporary set inited to RND, since we are calling rnd_next().
3093   int res = rnd_next(buf);
3094 
3095   std::uniform_real_distribution<double> rnd(0.0, 1.0);
3096   while (!res && rnd(m_random_number_engine) > (m_sampling_percentage / 100.0))
3097     res = rnd_next(buf);
3098 
3099   return res;
3100 }
3101 
records(ha_rows * num_rows)3102 int handler::records(ha_rows *num_rows) {
3103   if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) {
3104     *num_rows = stats.records;
3105     return 0;
3106   }
3107 
3108   int error = 0;
3109   ha_rows rows = 0;
3110   start_psi_batch_mode();
3111 
3112   if (!(error = ha_rnd_init(true))) {
3113     while (!table->in_use->killed) {
3114       DBUG_EXECUTE_IF("bug28079850", table->in_use->killed = THD::KILL_QUERY;);
3115       if ((error = ha_rnd_next(table->record[0]))) {
3116         if (error == HA_ERR_RECORD_DELETED)
3117           continue;
3118         else
3119           break;
3120       }
3121       ++rows;
3122     }
3123   }
3124 
3125   *num_rows = rows;
3126   end_psi_batch_mode();
3127   int ha_rnd_end_error = 0;
3128   if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR;
3129 
3130   // Call ha_rnd_end() only if only if handler has been initialized.
3131   if (inited && (ha_rnd_end_error = ha_rnd_end())) *num_rows = HA_POS_ERROR;
3132 
3133   return (error != HA_ERR_END_OF_FILE) ? error : ha_rnd_end_error;
3134 }
3135 
records_from_index(ha_rows * num_rows,uint index)3136 int handler::records_from_index(ha_rows *num_rows, uint index) {
3137   if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) {
3138     *num_rows = stats.records;
3139     return 0;
3140   }
3141 
3142   int error = 0;
3143   ha_rows rows = 0;
3144   uchar *buf = table->record[0];
3145   start_psi_batch_mode();
3146 
3147   if (!(error = ha_index_init(index, false))) {
3148     if (!(error = ha_index_first(buf))) {
3149       rows = 1;
3150 
3151       while (!table->in_use->killed) {
3152         DBUG_EXECUTE_IF("bug28079850",
3153                         table->in_use->killed = THD::KILL_QUERY;);
3154         if ((error = ha_index_next(buf))) {
3155           if (error == HA_ERR_RECORD_DELETED)
3156             continue;
3157           else
3158             break;
3159         }
3160         ++rows;
3161       }
3162     }
3163   }
3164 
3165   *num_rows = rows;
3166   end_psi_batch_mode();
3167   int ha_index_end_error = 0;
3168   if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR;
3169 
3170   // Call ha_index_end() only if handler has been initialized.
3171   if (inited && (ha_index_end_error = ha_index_end())) *num_rows = HA_POS_ERROR;
3172 
3173   return (error != HA_ERR_END_OF_FILE) ? error : ha_index_end_error;
3174 }
3175 
handle_records_error(int error,ha_rows * num_rows)3176 int handler::handle_records_error(int error, ha_rows *num_rows) {
3177   // If query was killed set the error since not all storage engines do it.
3178   if (table->in_use->killed) {
3179     *num_rows = HA_POS_ERROR;
3180     if (error == 0) error = HA_ERR_QUERY_INTERRUPTED;
3181   }
3182 
3183   if (error != 0) DBUG_ASSERT(*num_rows == HA_POS_ERROR);
3184   if (*num_rows == HA_POS_ERROR) DBUG_ASSERT(error != 0);
3185   if (error != 0) {
3186     /*
3187       ha_innobase::records may have rolled back internally.
3188       In this case, thd_mark_transaction_to_rollback() will have been called.
3189       For the errors below, we need to abort right away.
3190     */
3191     switch (error) {
3192       case HA_ERR_LOCK_DEADLOCK:
3193       case HA_ERR_LOCK_TABLE_FULL:
3194       case HA_ERR_LOCK_WAIT_TIMEOUT:
3195       case HA_ERR_QUERY_INTERRUPTED:
3196         print_error(error, MYF(0));
3197         return error;
3198       default:
3199         return error;
3200     }
3201   }
3202   return 0;
3203 }
3204 
3205 /**
3206   Read [part of] row via [part of] index.
3207   @param[out] buf          buffer where store the data
3208   @param      key          Key to search for
3209   @param      keypart_map  Which part of key to use
3210   @param      find_flag    Direction/condition on key usage
3211 
3212   @returns Operation status
3213     @retval  0                   Success (found a record, and function has
3214                                  set table status to "has row")
3215     @retval  HA_ERR_END_OF_FILE  Row not found (function has set table status
3216                                  to "no row"). End of index passed.
3217     @retval  HA_ERR_KEY_NOT_FOUND Row not found (function has set table status
3218                                  to "no row"). Index cursor positioned.
3219     @retval  != 0                Error
3220 
3221   @note Positions an index cursor to the index specified in the handle.
3222   Fetches the row if available. If the key value is null,
3223   begin at the first key of the index.
3224   ha_index_read_map can be restarted without calling index_end on the previous
3225   index scan and without calling ha_index_init. In this case the
3226   ha_index_read_map is on the same index as the previous ha_index_scan.
3227   This is particularly used in conjunction with multi read ranges.
3228 */
3229 
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3230 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3231                                key_part_map keypart_map,
3232                                enum ha_rkey_function find_flag) {
3233   int result;
3234   DBUG_TRACE;
3235   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3236   DBUG_ASSERT(inited == INDEX);
3237   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3238 
3239   // Set status for the need to update generated fields
3240   m_update_generated_read_fields = table->has_gcol();
3241 
3242   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, {
3243     result = index_read_map(buf, key, keypart_map, find_flag);
3244   })
3245   if (!result && m_update_generated_read_fields) {
3246     result = update_generated_read_fields(buf, table, active_index);
3247     m_update_generated_read_fields = false;
3248   }
3249   table->set_row_status_from_handler(result);
3250   return result;
3251 }
3252 
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3253 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3254                                     key_part_map keypart_map) {
3255   int result;
3256   DBUG_TRACE;
3257   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3258   DBUG_ASSERT(inited == INDEX);
3259   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3260 
3261   // Set status for the need to update generated fields
3262   m_update_generated_read_fields = table->has_gcol();
3263 
3264   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3265                       { result = index_read_last_map(buf, key, keypart_map); })
3266   if (!result && m_update_generated_read_fields) {
3267     result = update_generated_read_fields(buf, table, active_index);
3268     m_update_generated_read_fields = false;
3269   }
3270   table->set_row_status_from_handler(result);
3271   return result;
3272 }
3273 
3274 /**
3275   Initializes an index and read it.
3276 
3277   @see handler::ha_index_read_map.
3278 */
3279 
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3280 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3281                                    key_part_map keypart_map,
3282                                    enum ha_rkey_function find_flag) {
3283   int result;
3284   DBUG_TRACE;
3285   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3286   DBUG_ASSERT(end_range == nullptr);
3287   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3288 
3289   // Set status for the need to update generated fields
3290   m_update_generated_read_fields = table->has_gcol();
3291 
3292   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result, {
3293     result = index_read_idx_map(buf, index, key, keypart_map, find_flag);
3294   })
3295   if (!result && m_update_generated_read_fields) {
3296     result = update_generated_read_fields(buf, table, index);
3297     m_update_generated_read_fields = false;
3298   }
3299   table->set_row_status_from_handler(result);
3300   return result;
3301 }
3302 
3303 /**
3304   Reads the next row via index.
3305 
3306   @param[out] buf  Row data
3307 
3308   @return Operation status.
3309     @retval  0                   Success
3310     @retval  HA_ERR_END_OF_FILE  Row not found
3311     @retval  != 0                Error
3312 */
3313 
ha_index_next(uchar * buf)3314 int handler::ha_index_next(uchar *buf) {
3315   int result;
3316   DBUG_TRACE;
3317   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3318   DBUG_ASSERT(inited == INDEX);
3319   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3320 
3321   // Set status for the need to update generated fields
3322   m_update_generated_read_fields = table->has_gcol();
3323 
3324   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3325                       { result = index_next(buf); })
3326   if (!result && m_update_generated_read_fields) {
3327     result = update_generated_read_fields(buf, table, active_index);
3328     m_update_generated_read_fields = false;
3329   }
3330   table->set_row_status_from_handler(result);
3331   return result;
3332 }
3333 
3334 /**
3335   Reads the previous row via index.
3336 
3337   @param[out] buf  Row data
3338 
3339   @return Operation status.
3340     @retval  0                   Success
3341     @retval  HA_ERR_END_OF_FILE  Row not found
3342     @retval  != 0                Error
3343 */
3344 
ha_index_prev(uchar * buf)3345 int handler::ha_index_prev(uchar *buf) {
3346   int result;
3347   DBUG_TRACE;
3348   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3349   DBUG_ASSERT(inited == INDEX);
3350   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3351 
3352   // Set status for the need to update generated fields
3353   m_update_generated_read_fields = table->has_gcol();
3354 
3355   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3356                       { result = index_prev(buf); })
3357   if (!result && m_update_generated_read_fields) {
3358     result = update_generated_read_fields(buf, table, active_index);
3359     m_update_generated_read_fields = false;
3360   }
3361   table->set_row_status_from_handler(result);
3362   return result;
3363 }
3364 
3365 /**
3366   Reads the first row via index.
3367 
3368   @param[out] buf  Row data
3369 
3370   @return Operation status.
3371     @retval  0                   Success
3372     @retval  HA_ERR_END_OF_FILE  Row not found
3373     @retval  != 0                Error
3374 */
3375 
ha_index_first(uchar * buf)3376 int handler::ha_index_first(uchar *buf) {
3377   int result;
3378   DBUG_TRACE;
3379   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3380   DBUG_ASSERT(inited == INDEX);
3381   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3382 
3383   // Set status for the need to update generated fields
3384   m_update_generated_read_fields = table->has_gcol();
3385 
3386   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3387                       { result = index_first(buf); })
3388   if (!result && m_update_generated_read_fields) {
3389     result = update_generated_read_fields(buf, table, active_index);
3390     m_update_generated_read_fields = false;
3391   }
3392   table->set_row_status_from_handler(result);
3393   return result;
3394 }
3395 
3396 /**
3397   Reads the last row via index.
3398 
3399   @param[out] buf  Row data
3400 
3401   @return Operation status.
3402     @retval  0                   Success
3403     @retval  HA_ERR_END_OF_FILE  Row not found
3404     @retval  != 0                Error
3405 */
3406 
ha_index_last(uchar * buf)3407 int handler::ha_index_last(uchar *buf) {
3408   int result;
3409   DBUG_TRACE;
3410   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3411   DBUG_ASSERT(inited == INDEX);
3412   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3413 
3414   // Set status for the need to update generated fields
3415   m_update_generated_read_fields = table->has_gcol();
3416 
3417   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3418                       { result = index_last(buf); })
3419   if (!result && m_update_generated_read_fields) {
3420     result = update_generated_read_fields(buf, table, active_index);
3421     m_update_generated_read_fields = false;
3422   }
3423   table->set_row_status_from_handler(result);
3424   return result;
3425 }
3426 
3427 /**
3428   Reads the next same row via index.
3429 
3430   @param[out] buf     Row data
3431   @param      key     Key to search for
3432   @param      keylen  Length of key
3433 
3434   @return Operation status.
3435     @retval  0                   Success
3436     @retval  HA_ERR_END_OF_FILE  Row not found
3437     @retval  != 0                Error
3438 */
3439 
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3440 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen) {
3441   int result;
3442   DBUG_TRACE;
3443   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3444   DBUG_ASSERT(inited == INDEX);
3445   DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3446 
3447   // Set status for the need to update generated fields
3448   m_update_generated_read_fields = table->has_gcol();
3449 
3450   MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3451                       { result = index_next_same(buf, key, keylen); })
3452   if (!result && m_update_generated_read_fields) {
3453     result = update_generated_read_fields(buf, table, active_index);
3454     m_update_generated_read_fields = false;
3455   }
3456   table->set_row_status_from_handler(result);
3457   return result;
3458 }
3459 
3460 /**
3461   Read first row (only) from a table.
3462 
3463   This is never called for tables whose storage engine do not contain exact
3464   statistics on number of records, e.g. InnoDB.
3465 
3466   @note Since there is only one implementation for this function, it is
3467         non-virtual and does not call a protected inner function, like
3468         most other handler functions.
3469 
3470   @note Implementation only calls other handler functions, so there is no need
3471         to update generated columns nor set table status.
3472 */
ha_read_first_row(uchar * buf,uint primary_key)3473 int handler::ha_read_first_row(uchar *buf, uint primary_key) {
3474   int error;
3475   DBUG_TRACE;
3476 
3477   ha_statistic_increment(&System_status_var::ha_read_first_count);
3478 
3479   /*
3480     If there is very few deleted rows in the table, find the first row by
3481     scanning the table.
3482     TODO remove the test for HA_READ_ORDER
3483   */
3484   if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3485       !(index_flags(primary_key, 0, false) & HA_READ_ORDER)) {
3486     if (!(error = ha_rnd_init(true))) {
3487       while ((error = ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3488         /* skip deleted row */;
3489       const int end_error = ha_rnd_end();
3490       if (!error) error = end_error;
3491     }
3492   } else {
3493     /* Find the first row through the primary key */
3494     if (!(error = ha_index_init(primary_key, false))) {
3495       error = ha_index_first(buf);
3496       const int end_error = ha_index_end();
3497       if (!error) error = end_error;
3498     }
3499   }
3500   return error;
3501 }
3502 
ha_index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3503 int handler::ha_index_read_pushed(uchar *buf, const uchar *key,
3504                                   key_part_map keypart_map) {
3505   DBUG_TRACE;
3506 
3507   // Set status for the need to update generated fields
3508   m_update_generated_read_fields = table->has_gcol();
3509 
3510   int result = index_read_pushed(buf, key, keypart_map);
3511   if (!result && m_update_generated_read_fields) {
3512     result = update_generated_read_fields(buf, table, active_index);
3513     m_update_generated_read_fields = false;
3514   }
3515   table->set_row_status_from_handler(result);
3516   return result;
3517 }
3518 
ha_index_next_pushed(uchar * buf)3519 int handler::ha_index_next_pushed(uchar *buf) {
3520   DBUG_TRACE;
3521 
3522   // Set status for the need to update generated fields
3523   m_update_generated_read_fields = table->has_gcol();
3524 
3525   int result = index_next_pushed(buf);
3526   if (!result && m_update_generated_read_fields) {
3527     result = update_generated_read_fields(buf, table, active_index);
3528     m_update_generated_read_fields = false;
3529   }
3530   table->set_row_status_from_handler(result);
3531   return result;
3532 }
3533 
3534 /**
3535   Generate the next auto-increment number based on increment and offset.
3536   computes the lowest number
3537   - strictly greater than "nr"
3538   - of the form: auto_increment_offset + N * auto_increment_increment
3539   If overflow happened then return MAX_ULONGLONG value as an
3540   indication of overflow.
3541   In most cases increment= offset= 1, in which case we get:
3542   @verbatim 1,2,3,4,5,... @endverbatim
3543     If increment=10 and offset=5 and previous number is 1, we get:
3544   @verbatim 1,5,15,25,35,... @endverbatim
3545 */
compute_next_insert_id(ulonglong nr,struct System_variables * variables)3546 inline ulonglong compute_next_insert_id(ulonglong nr,
3547                                         struct System_variables *variables) {
3548   const ulonglong save_nr = nr;
3549 
3550   if (variables->auto_increment_increment == 1)
3551     nr = nr + 1;  // optimization of the formula below
3552   else {
3553     nr = (((nr + variables->auto_increment_increment -
3554             variables->auto_increment_offset)) /
3555           (ulonglong)variables->auto_increment_increment);
3556     nr = (nr * (ulonglong)variables->auto_increment_increment +
3557           variables->auto_increment_offset);
3558   }
3559 
3560   if (unlikely(nr <= save_nr)) return ULLONG_MAX;
3561 
3562   return nr;
3563 }
3564 
adjust_next_insert_id_after_explicit_value(ulonglong nr)3565 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr) {
3566   /*
3567     If we have set THD::next_insert_id previously and plan to insert an
3568     explicitely-specified value larger than this, we need to increase
3569     THD::next_insert_id to be greater than the explicit value.
3570   */
3571   if ((next_insert_id > 0) && (nr >= next_insert_id))
3572     set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3573 }
3574 
3575 /** @brief
3576   Computes the largest number X:
3577   - smaller than or equal to "nr"
3578   - of the form: auto_increment_offset + N * auto_increment_increment
3579   where N>=0.
3580 
3581   SYNOPSIS
3582     prev_insert_id
3583       nr            Number to "round down"
3584       variables     variables struct containing auto_increment_increment and
3585                     auto_increment_offset
3586 
3587   RETURN
3588     The number X if it exists, "nr" otherwise.
3589 */
prev_insert_id(ulonglong nr,struct System_variables * variables)3590 inline ulonglong prev_insert_id(ulonglong nr,
3591                                 struct System_variables *variables) {
3592   if (unlikely(nr < variables->auto_increment_offset)) {
3593     /*
3594       There's nothing good we can do here. That is a pathological case, where
3595       the offset is larger than the column's max possible value, i.e. not even
3596       the first sequence value may be inserted. User will receive warning.
3597     */
3598     DBUG_PRINT("info", ("auto_increment: nr: %lu cannot honour "
3599                         "auto_increment_offset: %lu",
3600                         (ulong)nr, variables->auto_increment_offset));
3601     return nr;
3602   }
3603   if (variables->auto_increment_increment == 1)
3604     return nr;  // optimization of the formula below
3605   nr = (((nr - variables->auto_increment_offset)) /
3606         (ulonglong)variables->auto_increment_increment);
3607   return (nr * (ulonglong)variables->auto_increment_increment +
3608           variables->auto_increment_offset);
3609 }
3610 
3611 /**
3612   Update the auto_increment field if necessary.
3613 
3614   Updates columns with type NEXT_NUMBER if:
3615 
3616   - If column value is set to NULL (in which case
3617     autoinc_field_has_explicit_non_null_value is 0)
3618   - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3619     set. In the future we will only set NEXT_NUMBER fields if one sets them
3620     to NULL (or they are not included in the insert list).
3621 
3622     In those cases, we check if the currently reserved interval still has
3623     values we have not used. If yes, we pick the smallest one and use it.
3624     Otherwise:
3625 
3626   - If a list of intervals has been provided to the statement via SET
3627     INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3628     first unused interval from this list, consider it as reserved.
3629 
3630   - Otherwise we set the column for the first row to the value
3631     next_insert_id(get_auto_increment(column))) which is usually
3632     max-used-column-value+1.
3633     We call get_auto_increment() for the first row in a multi-row
3634     statement. get_auto_increment() will tell us the interval of values it
3635     reserved for us.
3636 
3637   - In both cases, for the following rows we use those reserved values without
3638     calling the handler again (we just progress in the interval, computing
3639     each new value from the previous one). Until we have exhausted them, then
3640     we either take the next provided interval or call get_auto_increment()
3641     again to reserve a new interval.
3642 
3643   - In both cases, the reserved intervals are remembered in
3644     thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3645     binlogging; the last reserved interval is remembered in
3646     auto_inc_interval_for_cur_row. The number of reserved intervals is
3647     remembered in auto_inc_intervals_count. It differs from the number of
3648     elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3649     latter list is cumulative over all statements forming one binlog event
3650     (when stored functions and triggers are used), and collapses two
3651     contiguous intervals in one (see its append() method).
3652 
3653     The idea is that generated auto_increment values are predictable and
3654     independent of the column values in the table.  This is needed to be
3655     able to replicate into a table that already has rows with a higher
3656     auto-increment value than the one that is inserted.
3657 
3658     After we have already generated an auto-increment number and the user
3659     inserts a column with a higher value than the last used one, we will
3660     start counting from the inserted value.
3661 
3662     This function's "outputs" are: the table's auto_increment field is filled
3663     with a value, thd->next_insert_id is filled with the value to use for the
3664     next row, if a value was autogenerated for the current row it is stored in
3665     thd->insert_id_for_cur_row, if get_auto_increment() was called
3666     thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3667     present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3668     this list.
3669 
3670   @todo
3671     Replace all references to "next number" or NEXT_NUMBER to
3672     "auto_increment", everywhere (see below: there is
3673     table->autoinc_field_has_explicit_non_null_value, and there also exists
3674     table->next_number_field, it's not consistent).
3675 
3676   @retval
3677     0	ok
3678   @retval
3679     HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
3680     returned ~(ulonglong) 0
3681   @retval
3682     HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3683     failure.
3684 */
3685 
3686 #define AUTO_INC_DEFAULT_NB_ROWS 1  // Some prefer 1024 here
3687 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3688 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3689 
update_auto_increment()3690 int handler::update_auto_increment() {
3691   ulonglong nr, nb_reserved_values = 0;
3692   bool append = false;
3693   THD *thd = table->in_use;
3694   struct System_variables *variables = &thd->variables;
3695   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3696   DBUG_TRACE;
3697 
3698   /*
3699     next_insert_id is a "cursor" into the reserved interval, it may go greater
3700     than the interval, but not smaller.
3701   */
3702   DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3703 
3704   if ((nr = table->next_number_field->val_int()) != 0 ||
3705       (table->autoinc_field_has_explicit_non_null_value &&
3706        thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)) {
3707     /*
3708       First test if the query was aborted due to strict mode constraints.
3709     */
3710     if (thd->is_error() &&
3711         thd->get_stmt_da()->mysql_errno() == ER_TRUNCATED_WRONG_VALUE)
3712       return HA_ERR_AUTOINC_ERANGE;
3713 
3714     /*
3715       Update next_insert_id if we had already generated a value in this
3716       statement (case of INSERT VALUES(null),(3763),(null):
3717       the last NULL needs to insert 3764, not the value of the first NULL plus
3718       1).
3719       Also we should take into account the the sign of the value.
3720       Since auto_increment value can't have negative value we should update
3721       next_insert_id only in case when we INSERTing explicit positive value.
3722       It means that for a table that has SIGNED INTEGER column when we execute
3723       the following statement
3724       INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3725       we shouldn't call adjust_next_insert_id_after_explicit_value()
3726       and the result row will be (1, -1, 2) (for new opened connection
3727       to the server). On the other hand, for the statement
3728       INSERT INTO t1 VALUES( NULL), (333), (NULL)
3729       we should call adjust_next_insert_id_after_explicit_value()
3730       and result row will be (1, 333, 334).
3731     */
3732     if (table->next_number_field->is_unsigned() || ((longlong)nr) > 0)
3733       adjust_next_insert_id_after_explicit_value(nr);
3734 
3735     insert_id_for_cur_row = 0;  // didn't generate anything
3736     return 0;
3737   }
3738 
3739   if (next_insert_id > table->next_number_field->get_max_int_value())
3740     return HA_ERR_AUTOINC_READ_FAILED;
3741 
3742   if ((nr = next_insert_id) >= auto_inc_interval_for_cur_row.maximum()) {
3743     /* next_insert_id is beyond what is reserved, so we reserve more. */
3744     const Discrete_interval *forced = thd->auto_inc_intervals_forced.get_next();
3745     if (forced != nullptr) {
3746       nr = forced->minimum();
3747       /*
3748         In a multi insert statement when the number of affected rows is known
3749         then reserve those many number of auto increment values. So that
3750         interval will be starting value to starting value + number of affected
3751         rows * increment of auto increment.
3752        */
3753       nb_reserved_values = (estimation_rows_to_insert > 0)
3754                                ? estimation_rows_to_insert
3755                                : forced->values();
3756     } else {
3757       /*
3758         handler::estimation_rows_to_insert was set by
3759         handler::ha_start_bulk_insert(); if 0 it means "unknown".
3760       */
3761       ulonglong nb_desired_values;
3762       /*
3763         If an estimation was given to the engine:
3764         - use it.
3765         - if we already reserved numbers, it means the estimation was
3766         not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3767         time, twice that the 3rd time etc.
3768         If no estimation was given, use those increasing defaults from the
3769         start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3770         Don't go beyond a max to not reserve "way too much" (because
3771         reservation means potentially losing unused values).
3772         Note that in prelocked mode no estimation is given.
3773       */
3774 
3775       if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3776         nb_desired_values = estimation_rows_to_insert;
3777       else if ((auto_inc_intervals_count == 0) &&
3778                (thd->lex->bulk_insert_row_cnt > 0)) {
3779         /*
3780           For multi-row inserts, if the bulk inserts cannot be started, the
3781           handler::estimation_rows_to_insert will not be set. But we still
3782           want to reserve the autoinc values.
3783         */
3784         nb_desired_values = thd->lex->bulk_insert_row_cnt;
3785       } else /* go with the increasing defaults */
3786       {
3787         /* avoid overflow in formula, with this if() */
3788         if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS) {
3789           nb_desired_values =
3790               AUTO_INC_DEFAULT_NB_ROWS * (1 << auto_inc_intervals_count);
3791           nb_desired_values =
3792               std::min(nb_desired_values, ulonglong(AUTO_INC_DEFAULT_NB_MAX));
3793         } else
3794           nb_desired_values = AUTO_INC_DEFAULT_NB_MAX;
3795       }
3796       /* This call ignores all its parameters but nr, currently */
3797       get_auto_increment(variables->auto_increment_offset,
3798                          variables->auto_increment_increment, nb_desired_values,
3799                          &nr, &nb_reserved_values);
3800       if (nr == ULLONG_MAX) return HA_ERR_AUTOINC_READ_FAILED;  // Mark failure
3801 
3802       /*
3803         That rounding below should not be needed when all engines actually
3804         respect offset and increment in get_auto_increment(). But they don't
3805         so we still do it. Wonder if for the not-first-in-index we should do
3806         it. Hope that this rounding didn't push us out of the interval; even
3807         if it did we cannot do anything about it (calling the engine again
3808         will not help as we inserted no row).
3809       */
3810       nr = compute_next_insert_id(nr - 1, variables);
3811     }
3812 
3813     if (table->s->next_number_keypart == 0) {
3814       /* We must defer the appending until "nr" has been possibly truncated */
3815       append = true;
3816     } else {
3817       /*
3818         For such auto_increment there is no notion of interval, just a
3819         singleton. The interval is not even stored in
3820         thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3821         for next row.
3822       */
3823       DBUG_PRINT("info", ("auto_increment: special not-first-in-index"));
3824     }
3825   }
3826 
3827   if (unlikely(nr == ULLONG_MAX)) return HA_ERR_AUTOINC_ERANGE;
3828 
3829   DBUG_PRINT("info", ("auto_increment: %lu", (ulong)nr));
3830 
3831   if (unlikely(table->next_number_field->store((longlong)nr, true))) {
3832     /*
3833       first test if the query was aborted due to strict mode constraints
3834     */
3835     if (thd->is_error() &&
3836         thd->get_stmt_da()->mysql_errno() == ER_WARN_DATA_OUT_OF_RANGE)
3837       return HA_ERR_AUTOINC_ERANGE;
3838 
3839     /*
3840       field refused this value (overflow) and truncated it, use the result of
3841       the truncation (which is going to be inserted); however we try to
3842       decrease it to honour auto_increment_* variables.
3843       That will shift the left bound of the reserved interval, we don't
3844       bother shifting the right bound (anyway any other value from this
3845       interval will cause a duplicate key).
3846     */
3847     nr = prev_insert_id(table->next_number_field->val_int(), variables);
3848     if (unlikely(table->next_number_field->store((longlong)nr, true)))
3849       nr = table->next_number_field->val_int();
3850   }
3851   if (append) {
3852     auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3853                                           variables->auto_increment_increment);
3854     auto_inc_intervals_count++;
3855     /* Row-based replication does not need to store intervals in binlog */
3856     if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3857       thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(
3858           auto_inc_interval_for_cur_row.minimum(),
3859           auto_inc_interval_for_cur_row.values(),
3860           variables->auto_increment_increment);
3861   }
3862 
3863   /*
3864     Record this autogenerated value. If the caller then
3865     succeeds to insert this value, it will call
3866     record_first_successful_insert_id_in_cur_stmt()
3867     which will set first_successful_insert_id_in_cur_stmt if it's not
3868     already set.
3869   */
3870   insert_id_for_cur_row = nr;
3871   /*
3872     Set next insert id to point to next auto-increment value to be able to
3873     handle multi-row statements.
3874   */
3875   set_next_insert_id(compute_next_insert_id(nr, variables));
3876 
3877   return 0;
3878 }
3879 
3880 /** @brief
3881   MySQL signal that it changed the column bitmap
3882 
3883   USAGE
3884     This is for handlers that needs to setup their own column bitmaps.
3885     Normally the handler should set up their own column bitmaps in
3886     index_init() or rnd_init() and in any column_bitmaps_signal() call after
3887     this.
3888 
3889     The handler is allowd to do changes to the bitmap after a index_init or
3890     rnd_init() call is made as after this, MySQL will not use the bitmap
3891     for any program logic checking.
3892 */
column_bitmaps_signal()3893 void handler::column_bitmaps_signal() {
3894   DBUG_TRACE;
3895   DBUG_PRINT("info", ("read_set: %p  write_set: %p", table->read_set,
3896                       table->write_set));
3897 }
3898 
3899 /**
3900   Reserves an interval of auto_increment values from the handler.
3901 
3902   @param       offset              offset (modulus increment)
3903   @param       increment           increment between calls
3904   @param       nb_desired_values   how many values we want
3905   @param[out]  first_value         the first value reserved by the handler
3906   @param[out]  nb_reserved_values  how many values the handler reserved
3907 
3908   offset and increment means that we want values to be of the form
3909   offset + N * increment, where N>=0 is integer.
3910   If the function sets *first_value to ULLONG_MAX it means an error.
3911   If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3912   reserved to "positive infinite".
3913 */
3914 
get_auto_increment(ulonglong offset MY_ATTRIBUTE ((unused)),ulonglong increment MY_ATTRIBUTE ((unused)),ulonglong nb_desired_values MY_ATTRIBUTE ((unused)),ulonglong * first_value,ulonglong * nb_reserved_values)3915 void handler::get_auto_increment(
3916     ulonglong offset MY_ATTRIBUTE((unused)),
3917     ulonglong increment MY_ATTRIBUTE((unused)),
3918     ulonglong nb_desired_values MY_ATTRIBUTE((unused)), ulonglong *first_value,
3919     ulonglong *nb_reserved_values) {
3920   ulonglong nr;
3921   int error;
3922   DBUG_TRACE;
3923 
3924   (void)extra(HA_EXTRA_KEYREAD);
3925   table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3926                                              table->read_set);
3927   column_bitmaps_signal();
3928 
3929   if (ha_index_init(table->s->next_number_index, true)) {
3930     /* This should never happen, assert in debug, and fail in release build */
3931     DBUG_ASSERT(0);
3932     *first_value = ULLONG_MAX;
3933     return;
3934   }
3935 
3936   if (table->s->next_number_keypart == 0) {  // Autoincrement at key-start
3937     error = ha_index_last(table->record[1]);
3938     /*
3939       MySQL implicitely assumes such method does locking (as MySQL decides to
3940       use nr+increment without checking again with the handler, in
3941       handler::update_auto_increment()), so reserves to infinite.
3942     */
3943     *nb_reserved_values = ULLONG_MAX;
3944   } else {
3945     uchar key[MAX_KEY_LENGTH];
3946     key_copy(key, table->record[0],
3947              table->key_info + table->s->next_number_index,
3948              table->s->next_number_key_offset);
3949     error =
3950         ha_index_read_map(table->record[1], key,
3951                           make_prev_keypart_map(table->s->next_number_keypart),
3952                           HA_READ_PREFIX_LAST);
3953     /*
3954       MySQL needs to call us for next row: assume we are inserting ("a",null)
3955       here, we return 3, and next this statement will want to insert
3956       ("b",null): there is no reason why ("b",3+1) would be the good row to
3957       insert: maybe it already exists, maybe 3+1 is too large...
3958     */
3959     *nb_reserved_values = 1;
3960   }
3961 
3962   if (error) {
3963     if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) {
3964       /* No entry found, start with 1. */
3965       nr = 1;
3966     } else {
3967       DBUG_ASSERT(0);
3968       nr = ULLONG_MAX;
3969     }
3970   } else
3971     nr = ((ulonglong)table->next_number_field->val_int_offset(
3972               table->s->rec_buff_length) +
3973           1);
3974   ha_index_end();
3975   (void)extra(HA_EXTRA_NO_KEYREAD);
3976   *first_value = nr;
3977 }
3978 
ha_release_auto_increment()3979 void handler::ha_release_auto_increment() {
3980   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3981               m_lock_type != F_UNLCK ||
3982               (!next_insert_id && !insert_id_for_cur_row));
3983   DEBUG_SYNC(ha_thd(), "release_auto_increment");
3984   release_auto_increment();
3985   insert_id_for_cur_row = 0;
3986   auto_inc_interval_for_cur_row.replace(0, 0, 0);
3987   auto_inc_intervals_count = 0;
3988   if (next_insert_id > 0) {
3989     next_insert_id = 0;
3990     /*
3991       this statement used forced auto_increment values if there were some,
3992       wipe them away for other statements.
3993     */
3994     table->in_use->auto_inc_intervals_forced.empty();
3995   }
3996 }
3997 
table_case_name(const HA_CREATE_INFO * info,const char * name)3998 const char *table_case_name(const HA_CREATE_INFO *info, const char *name) {
3999   return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
4000 }
4001 
4002 /**
4003   Construct and emit duplicate key error message using information
4004   from table's record buffer.
4005 
4006   @param table    TABLE object which record buffer should be used as
4007                   source for column values.
4008   @param key      Key description.
4009   @param msg      Error message template to which key value should be
4010                   added.
4011   @param errflag  Flags for my_error() call.
4012   @param org_table_name  The original table name (if any)
4013 */
4014 
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag,const char * org_table_name)4015 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag,
4016                         const char *org_table_name) {
4017   /* Write the duplicated key in the error message */
4018   char key_buff[MAX_KEY_LENGTH];
4019   String str(key_buff, sizeof(key_buff), system_charset_info);
4020   std::string key_name;
4021 
4022   if (key == nullptr) {
4023     /* Key is unknown */
4024     key_name = "*UNKNOWN*";
4025     str.copy("", 0, system_charset_info);
4026 
4027   } else {
4028     /* Table is opened and defined at this point */
4029     key_unpack(&str, table, key);
4030     size_t max_length = MYSQL_ERRMSG_SIZE - strlen(msg);
4031     if (str.length() >= max_length) {
4032       str.length(max_length - 4);
4033       str.append(STRING_WITH_LEN("..."));
4034     }
4035     str[str.length()] = 0;
4036     if (org_table_name != nullptr)
4037       key_name = org_table_name;
4038     else
4039       key_name = table->s->table_name.str;
4040     key_name += ".";
4041 
4042     key_name += key->name;
4043   }
4044 
4045   my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), key_name.c_str());
4046 }
4047 
4048 /**
4049   Construct and emit duplicate key error message using information
4050   from table's record buffer.
4051 
4052   @sa print_keydup_error(table, key, msg, errflag).
4053 */
4054 
print_keydup_error(TABLE * table,KEY * key,myf errflag,const char * org_table_name)4055 void print_keydup_error(TABLE *table, KEY *key, myf errflag,
4056                         const char *org_table_name) {
4057   print_keydup_error(table, key,
4058                      ER_THD(current_thd, ER_DUP_ENTRY_WITH_KEY_NAME), errflag,
4059                      org_table_name);
4060 }
4061 
4062 /**
4063   This method is used to analyse the error to see whether the error
4064   is ignorable or not. Further comments in header file.
4065 */
4066 
is_ignorable_error(int error)4067 bool handler::is_ignorable_error(int error) {
4068   DBUG_TRACE;
4069 
4070   // Catch errors that are ignorable
4071   switch (error) {
4072     // Error code 0 is not an error.
4073     case 0:
4074     // Dup key errors may be explicitly ignored.
4075     case HA_ERR_FOUND_DUPP_KEY:
4076     case HA_ERR_FOUND_DUPP_UNIQUE:
4077     // Foreign key constraint violations are ignorable.
4078     case HA_ERR_ROW_IS_REFERENCED:
4079     case HA_ERR_NO_REFERENCED_ROW:
4080       return true;
4081   }
4082 
4083   // Default is that an error is not ignorable.
4084   return false;
4085 }
4086 
4087 /**
4088   This method is used to analyse the error to see whether the error
4089   is fatal or not. Further comments in header file.
4090 */
4091 
is_fatal_error(int error)4092 bool handler::is_fatal_error(int error) {
4093   DBUG_TRACE;
4094 
4095   // No ignorable errors are fatal
4096   if (is_ignorable_error(error)) return false;
4097 
4098   // Catch errors that are not fatal
4099   switch (error) {
4100     /*
4101       Deadlock and lock timeout cause transaction/statement rollback so that
4102       THD::is_fatal_sub_stmt_error will be set. This means that they will not
4103       be possible to handle by stored program handlers inside stored functions
4104       and triggers even if non-fatal.
4105     */
4106     case HA_ERR_LOCK_WAIT_TIMEOUT:
4107     case HA_ERR_LOCK_DEADLOCK:
4108       return false;
4109 
4110     case HA_ERR_NULL_IN_SPATIAL:
4111       return false;
4112   }
4113 
4114   // Default is that an error is fatal
4115   return true;
4116 }
4117 
4118 /**
4119   Print error that we got from handler function.
4120 
4121   @note
4122     In case of delete table it's only safe to use the following parts of
4123     the 'table' structure:
4124     - table->s->path
4125     - table->alias
4126 */
print_error(int error,myf errflag)4127 void handler::print_error(int error, myf errflag) {
4128   THD *thd = current_thd;
4129   Foreign_key_error_handler foreign_key_error_handler(thd, this);
4130 
4131   DBUG_TRACE;
4132   DBUG_PRINT("enter", ("error: %d", error));
4133 
4134   int textno = ER_GET_ERRNO;
4135   switch (error) {
4136     case EACCES:
4137       textno = ER_OPEN_AS_READONLY;
4138       break;
4139     case EAGAIN:
4140       textno = ER_FILE_USED;
4141       break;
4142     case ENOENT: {
4143       char errbuf[MYSYS_STRERROR_SIZE];
4144       textno = ER_FILE_NOT_FOUND;
4145       my_error(textno, errflag, table_share->table_name.str, error,
4146                my_strerror(errbuf, sizeof(errbuf), error));
4147     } break;
4148     case HA_ERR_KEY_NOT_FOUND:
4149     case HA_ERR_NO_ACTIVE_RECORD:
4150     case HA_ERR_RECORD_DELETED:
4151     case HA_ERR_END_OF_FILE:
4152       textno = ER_KEY_NOT_FOUND;
4153       break;
4154     case HA_ERR_WRONG_MRG_TABLE_DEF:
4155       textno = ER_WRONG_MRG_TABLE;
4156       break;
4157     case HA_ERR_FOUND_DUPP_KEY: {
4158       uint key_nr = table ? get_dup_key(error) : -1;
4159       if ((int)key_nr >= 0) {
4160         print_keydup_error(
4161             table, key_nr == MAX_KEY ? nullptr : &table->key_info[key_nr],
4162             errflag);
4163         return;
4164       }
4165       textno = ER_DUP_KEY;
4166       break;
4167     }
4168     case HA_ERR_FOREIGN_DUPLICATE_KEY: {
4169       DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4170                   m_lock_type != F_UNLCK);
4171 
4172       char rec_buf[MAX_KEY_LENGTH];
4173       String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4174       /* Table is opened and defined at this point */
4175 
4176       /*
4177         Just print the subset of fields that are part of the first index,
4178         printing the whole row from there is not easy.
4179       */
4180       key_unpack(&rec, table, &table->key_info[0]);
4181 
4182       char child_table_name[NAME_LEN + 1];
4183       char child_key_name[NAME_LEN + 1];
4184       if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4185                               child_key_name, sizeof(child_key_name))) {
4186         my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4187                  table_share->table_name.str, rec.c_ptr_safe(),
4188                  child_table_name, child_key_name);
4189       } else {
4190         my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4191                  table_share->table_name.str, rec.c_ptr_safe());
4192       }
4193       return;
4194     }
4195     case HA_ERR_NULL_IN_SPATIAL:
4196       my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4197       return;
4198     case HA_ERR_FOUND_DUPP_UNIQUE:
4199       textno = ER_DUP_UNIQUE;
4200       break;
4201     case HA_ERR_RECORD_CHANGED:
4202       textno = ER_CHECKREAD;
4203       break;
4204     case HA_ERR_CRASHED:
4205       textno = ER_NOT_KEYFILE;
4206       break;
4207     case HA_ERR_WRONG_IN_RECORD:
4208       textno = ER_CRASHED_ON_USAGE;
4209       break;
4210     case HA_ERR_CRASHED_ON_USAGE:
4211       textno = ER_CRASHED_ON_USAGE;
4212       break;
4213     case HA_ERR_NOT_A_TABLE:
4214       textno = error;
4215       break;
4216     case HA_ERR_CRASHED_ON_REPAIR:
4217       textno = ER_CRASHED_ON_REPAIR;
4218       break;
4219     case HA_ERR_OUT_OF_MEM:
4220       textno = ER_OUT_OF_RESOURCES;
4221       break;
4222     case HA_ERR_SE_OUT_OF_MEMORY:
4223       my_error(ER_ENGINE_OUT_OF_MEMORY, errflag, table->file->table_type());
4224       return;
4225     case HA_ERR_WRONG_COMMAND:
4226       textno = ER_ILLEGAL_HA;
4227       break;
4228     case HA_ERR_OLD_FILE:
4229       textno = ER_OLD_KEYFILE;
4230       break;
4231     case HA_ERR_UNSUPPORTED:
4232       textno = ER_UNSUPPORTED_EXTENSION;
4233       break;
4234     case HA_ERR_RECORD_FILE_FULL:
4235     case HA_ERR_INDEX_FILE_FULL: {
4236       textno = ER_RECORD_FILE_FULL;
4237       /* Write the error message to error log */
4238       LogErr(ERROR_LEVEL, ER_SERVER_RECORD_FILE_FULL,
4239              table_share->table_name.str);
4240       break;
4241     }
4242     case HA_ERR_DISK_FULL_NOWAIT: {
4243       textno = ER_DISK_FULL_NOWAIT;
4244       /* Write the error message to error log */
4245       LogErr(ERROR_LEVEL, ER_SERVER_DISK_FULL_NOWAIT,
4246              table_share->table_name.str);
4247       break;
4248     }
4249     case HA_ERR_LOCK_WAIT_TIMEOUT:
4250       textno = ER_LOCK_WAIT_TIMEOUT;
4251       break;
4252     case HA_ERR_LOCK_TABLE_FULL:
4253       textno = ER_LOCK_TABLE_FULL;
4254       break;
4255     case HA_ERR_LOCK_DEADLOCK:
4256       textno = ER_LOCK_DEADLOCK;
4257       break;
4258     case HA_ERR_READ_ONLY_TRANSACTION:
4259       textno = ER_READ_ONLY_TRANSACTION;
4260       break;
4261     case HA_ERR_CANNOT_ADD_FOREIGN:
4262       textno = ER_CANNOT_ADD_FOREIGN;
4263       break;
4264     case HA_ERR_ROW_IS_REFERENCED: {
4265       String str;
4266       /*
4267         Manipulate the error message while handling the error
4268         condition based on the access check.
4269       */
4270       thd->push_internal_handler(&foreign_key_error_handler);
4271       get_error_message(error, &str);
4272       my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4273       thd->pop_internal_handler();
4274       return;
4275     }
4276     case HA_ERR_NO_REFERENCED_ROW: {
4277       String str;
4278       /*
4279         Manipulate the error message while handling the error
4280         condition based on the access check.
4281       */
4282       thd->push_internal_handler(&foreign_key_error_handler);
4283       get_error_message(error, &str);
4284       my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4285       thd->pop_internal_handler();
4286       return;
4287     }
4288     case HA_ERR_TABLE_DEF_CHANGED:
4289       textno = ER_TABLE_DEF_CHANGED;
4290       break;
4291     case HA_ERR_NO_SUCH_TABLE:
4292       my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4293                table_share->table_name.str);
4294       return;
4295     case HA_ERR_RBR_LOGGING_FAILED:
4296       textno = ER_BINLOG_ROW_LOGGING_FAILED;
4297       break;
4298     case HA_ERR_DROP_INDEX_FK: {
4299       const char *ptr = "???";
4300       uint key_nr = table ? get_dup_key(error) : -1;
4301       if ((int)key_nr >= 0 && key_nr != MAX_KEY)
4302         ptr = table->key_info[key_nr].name;
4303       my_error(ER_DROP_INDEX_FK, errflag, ptr);
4304       return;
4305     }
4306     case HA_ERR_TABLE_NEEDS_UPGRADE:
4307       textno = ER_TABLE_NEEDS_UPGRADE;
4308       break;
4309     case HA_ERR_NO_PARTITION_FOUND:
4310       textno = ER_WRONG_PARTITION_NAME;
4311       break;
4312     case HA_ERR_TABLE_READONLY:
4313       textno = ER_OPEN_AS_READONLY;
4314       break;
4315     case HA_ERR_AUTOINC_READ_FAILED:
4316       textno = ER_AUTOINC_READ_FAILED;
4317       break;
4318     case HA_ERR_AUTOINC_ERANGE:
4319       textno = ER_WARN_DATA_OUT_OF_RANGE;
4320       break;
4321     case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4322       textno = ER_TOO_MANY_CONCURRENT_TRXS;
4323       break;
4324     case HA_ERR_INDEX_COL_TOO_LONG:
4325       textno = ER_INDEX_COLUMN_TOO_LONG;
4326       break;
4327     case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4328       textno = ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4329       break;
4330     case HA_ERR_INDEX_CORRUPT:
4331       textno = ER_INDEX_CORRUPT;
4332       break;
4333     case HA_ERR_UNDO_REC_TOO_BIG:
4334       textno = ER_UNDO_RECORD_TOO_BIG;
4335       break;
4336     case HA_ERR_TABLE_IN_FK_CHECK:
4337       textno = ER_TABLE_IN_FK_CHECK;
4338       break;
4339     case HA_WRONG_CREATE_OPTION:
4340       textno = ER_ILLEGAL_HA;
4341       break;
4342     case HA_MISSING_CREATE_OPTION: {
4343       const char *engine = table_type();
4344       my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4345       return;
4346     }
4347     case HA_ERR_TOO_MANY_FIELDS:
4348       textno = ER_TOO_MANY_FIELDS;
4349       break;
4350     case HA_ERR_INNODB_READ_ONLY:
4351       textno = ER_INNODB_READ_ONLY;
4352       break;
4353     case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4354       textno = ER_TEMP_FILE_WRITE_FAILURE;
4355       break;
4356     case HA_ERR_INNODB_FORCED_RECOVERY:
4357       textno = ER_INNODB_FORCED_RECOVERY;
4358       break;
4359     case HA_ERR_TABLE_CORRUPT:
4360       my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4361                table_share->table_name.str);
4362       return;
4363     case HA_ERR_QUERY_INTERRUPTED:
4364       textno = ER_QUERY_INTERRUPTED;
4365       break;
4366     case HA_ERR_TABLESPACE_MISSING: {
4367       char errbuf[MYSYS_STRERROR_SIZE];
4368       snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4369                table_share->table_name.str);
4370       my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4371       return;
4372     }
4373     case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4374       my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4375                table_share->table_name.str);
4376       return;
4377     case HA_ERR_WRONG_FILE_NAME:
4378       my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4379       return;
4380     case HA_ERR_NOT_ALLOWED_COMMAND:
4381       textno = ER_NOT_ALLOWED_COMMAND;
4382       break;
4383     case HA_ERR_NO_SESSION_TEMP:
4384       textno = ER_NO_SESSION_TEMP;
4385       break;
4386     case HA_ERR_WRONG_TABLE_NAME:
4387       textno = ER_WRONG_TABLE_NAME;
4388       break;
4389     case HA_ERR_TOO_LONG_PATH:
4390       textno = ER_TABLE_NAME_CAUSES_TOO_LONG_PATH;
4391       break;
4392     default: {
4393       /* The error was "unknown" to this function.
4394          Ask handler if it has got a message for this error */
4395       String str;
4396       bool temporary = get_error_message(error, &str);
4397       if (!str.is_empty()) {
4398         const char *engine = table_type();
4399         if (temporary)
4400           my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4401         else
4402           my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4403       } else {
4404         char errbuf[MYSQL_ERRMSG_SIZE];
4405         my_error(ER_GET_ERRNO, errflag, error,
4406                  my_strerror(errbuf, MYSQL_ERRMSG_SIZE, error));
4407       }
4408       return;
4409     }
4410   }
4411   if (textno != ER_FILE_NOT_FOUND)
4412     my_error(textno, errflag, table_share->table_name.str, error);
4413 }
4414 
4415 /**
4416   Return an error message specific to this handler.
4417 
4418   @param error  error code previously returned by handler
4419   @param buf    pointer to String where to add error message
4420 
4421   @return
4422     Returns true if this is a temporary error
4423 */
get_error_message(int error MY_ATTRIBUTE ((unused)),String * buf MY_ATTRIBUTE ((unused)))4424 bool handler::get_error_message(int error MY_ATTRIBUTE((unused)),
4425                                 String *buf MY_ATTRIBUTE((unused))) {
4426   return false;
4427 }
4428 
4429 /**
4430   Check for incompatible collation changes.
4431 
4432   @retval
4433     HA_ADMIN_NEEDS_UPGRADE   Table may have data requiring upgrade.
4434   @retval
4435     0                        No upgrade required.
4436 */
4437 
check_collation_compatibility()4438 int handler::check_collation_compatibility() {
4439   ulong mysql_version = table->s->mysql_version;
4440 
4441   if (mysql_version < 50124) {
4442     KEY *key = table->key_info;
4443     KEY *key_end = key + table->s->keys;
4444     for (; key < key_end; key++) {
4445       KEY_PART_INFO *key_part = key->key_part;
4446       KEY_PART_INFO *key_part_end = key_part + key->user_defined_key_parts;
4447       for (; key_part < key_part_end; key_part++) {
4448         if (!key_part->fieldnr) continue;
4449         Field *field = table->field[key_part->fieldnr - 1];
4450         uint cs_number = field->charset()->number;
4451         if ((mysql_version < 50048 &&
4452              (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4453               cs_number == 41 || /* latin7_general_ci - bug #29461 */
4454               cs_number == 42 || /* latin7_general_cs - bug #29461 */
4455               cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4456               cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4457               cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4458               cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4459               cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4460             (mysql_version < 50124 &&
4461              (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4462               cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4463           return HA_ADMIN_NEEDS_UPGRADE;
4464       }
4465     }
4466   }
4467   return 0;
4468 }
4469 
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4470 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt) {
4471   int error;
4472   KEY *keyinfo, *keyend;
4473   KEY_PART_INFO *keypart, *keypartend;
4474 
4475   if (!table->s->mysql_version) {
4476     /* check for blob-in-key error */
4477     keyinfo = table->key_info;
4478     keyend = table->key_info + table->s->keys;
4479     for (; keyinfo < keyend; keyinfo++) {
4480       keypart = keyinfo->key_part;
4481       keypartend = keypart + keyinfo->user_defined_key_parts;
4482       for (; keypart < keypartend; keypart++) {
4483         if (!keypart->fieldnr) continue;
4484         Field *field = table->field[keypart->fieldnr - 1];
4485         if (field->type() == MYSQL_TYPE_BLOB) {
4486           if (check_opt->sql_flags & TT_FOR_UPGRADE)
4487             check_opt->flags = T_MEDIUM;
4488           return HA_ADMIN_NEEDS_CHECK;
4489         }
4490       }
4491     }
4492   }
4493 
4494   if ((error = check_collation_compatibility())) return error;
4495 
4496   return check_for_upgrade(check_opt);
4497 }
4498 
4499 // Function identifies any old data type present in table.
check_table_for_old_types(const TABLE * table,bool check_temporal_upgrade)4500 int check_table_for_old_types(const TABLE *table, bool check_temporal_upgrade) {
4501   Field **field;
4502 
4503   for (field = table->field; (*field); field++) {
4504     if (table->s->mysql_version == 0)  // prior to MySQL 5.0
4505     {
4506       /* check for bad DECIMAL field */
4507       if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) {
4508         return HA_ADMIN_NEEDS_ALTER;
4509       }
4510       if ((*field)->type() == MYSQL_TYPE_VAR_STRING) {
4511         return HA_ADMIN_NEEDS_ALTER;
4512       }
4513     }
4514 
4515     /*
4516       Check for old DECIMAL field.
4517 
4518       Above check does not take into account for pre 5.0 decimal types which can
4519       be present in the data directory if user did in-place upgrade from
4520       mysql-4.1 to mysql-5.0.
4521     */
4522     if ((*field)->type() == MYSQL_TYPE_DECIMAL) {
4523       return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4524     }
4525 
4526     if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4527       return HA_ADMIN_NEEDS_ALTER;  // obsolete YEAR(2) type
4528 
4529     if (check_temporal_upgrade) {
4530       if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4531           ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4532           ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4533         return HA_ADMIN_NEEDS_ALTER;
4534     }
4535   }
4536   return 0;
4537 }
4538 
4539 /**
4540   @return
4541     key if error because of duplicated keys
4542 */
get_dup_key(int error)4543 uint handler::get_dup_key(int error) {
4544   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4545   DBUG_TRACE;
4546   table->file->errkey = (uint)-1;
4547   if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE ||
4548       error == HA_ERR_NULL_IN_SPATIAL || error == HA_ERR_DROP_INDEX_FK)
4549     table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4550   return table->file->errkey;
4551 }
4552 
get_foreign_dup_key(char *,uint,char *,uint)4553 bool handler::get_foreign_dup_key(char *, uint, char *, uint) {
4554   DBUG_ASSERT(false);
4555   return (false);
4556 }
4557 
4558 /**
4559   Delete all files with extension from handlerton::file_extensions.
4560 
4561   @param name		Base name of table
4562 
4563   @note
4564     We assume that the handler may return more extensions than
4565     was actually used for the file.
4566 
4567   @retval
4568     0   If we successfully deleted at least one file from base_ext and
4569     didn't get any other errors than ENOENT
4570   @retval
4571     !0  Error
4572 */
delete_table(const char * name,const dd::Table *)4573 int handler::delete_table(const char *name, const dd::Table *) {
4574   int saved_error = 0;
4575   int error = 0;
4576   int enoent_or_zero = ENOENT;  // Error if no file was deleted
4577   char buff[FN_REFLEN];
4578   const char **start_ext;
4579 
4580   DBUG_ASSERT(m_lock_type == F_UNLCK);
4581 
4582   if (!(start_ext = ht->file_extensions)) return 0;
4583   for (const char **ext = start_ext; *ext; ext++) {
4584     fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME | MY_APPEND_EXT);
4585     if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0))) {
4586       if (my_errno() != ENOENT) {
4587         /*
4588           If error on the first existing file, return the error.
4589           Otherwise delete as much as possible.
4590         */
4591         if (enoent_or_zero) return my_errno();
4592         saved_error = my_errno();
4593       }
4594     } else
4595       enoent_or_zero = 0;  // No error for ENOENT
4596     error = enoent_or_zero;
4597   }
4598   return saved_error ? saved_error : error;
4599 }
4600 
rename_table(const char * from,const char * to,const dd::Table * from_table_def MY_ATTRIBUTE ((unused)),dd::Table * to_table_def MY_ATTRIBUTE ((unused)))4601 int handler::rename_table(const char *from, const char *to,
4602                           const dd::Table *from_table_def
4603                               MY_ATTRIBUTE((unused)),
4604                           dd::Table *to_table_def MY_ATTRIBUTE((unused))) {
4605   int error = 0;
4606   const char **ext, **start_ext;
4607 
4608   if (!(start_ext = ht->file_extensions)) return 0;
4609   for (ext = start_ext; *ext; ext++) {
4610     if (rename_file_ext(from, to, *ext)) {
4611       error = my_errno();
4612       if (error != ENOENT) break;
4613       error = 0;
4614     }
4615   }
4616   if (error) {
4617     /* Try to revert the rename. Ignore errors. */
4618     for (; ext >= start_ext; ext--) rename_file_ext(to, from, *ext);
4619   }
4620   return error;
4621 }
4622 
drop_table(const char * name)4623 void handler::drop_table(const char *name) {
4624   close();
4625   delete_table(name, nullptr);
4626 }
4627 
4628 /**
4629   Performs checks upon the table.
4630 
4631   @param thd                thread doing CHECK TABLE operation
4632   @param check_opt          options from the parser
4633 
4634   @retval
4635     HA_ADMIN_OK               Successful upgrade
4636   @retval
4637     HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
4638   @retval
4639     HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
4640   @retval
4641     HA_ADMIN_NOT_IMPLEMENTED
4642 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4643 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt) {
4644   int error;
4645   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4646 
4647   if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4648       (check_opt->sql_flags & TT_FOR_UPGRADE))
4649     return 0;
4650 
4651   if (table->s->mysql_version < MYSQL_VERSION_ID) {
4652     // Check for old temporal format if avoid_temporal_upgrade is disabled.
4653     mysql_mutex_lock(&LOCK_global_system_variables);
4654     const bool check_temporal_upgrade = !avoid_temporal_upgrade;
4655     mysql_mutex_unlock(&LOCK_global_system_variables);
4656 
4657     if ((error = check_table_for_old_types(table, check_temporal_upgrade)))
4658       return error;
4659     error = ha_check_for_upgrade(check_opt);
4660     if (error && (error != HA_ADMIN_NEEDS_CHECK)) return error;
4661     if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE)) return 0;
4662   }
4663   return check(thd, check_opt);
4664 }
4665 
4666 /**
4667   A helper function to mark a transaction read-write,
4668   if it is started.
4669 */
4670 
mark_trx_read_write()4671 void handler::mark_trx_read_write() {
4672   Ha_trx_info *ha_info = &ha_thd()->get_ha_data(ht->slot)->ha_info[0];
4673   /*
4674     When a storage engine method is called, the transaction must
4675     have been started, unless it's a DDL call, for which the
4676     storage engine starts the transaction internally, and commits
4677     it internally, without registering in the ha_list.
4678     Unfortunately here we can't know for sure if the engine
4679     has registered the transaction or not, so we must check.
4680   */
4681   if (ha_info->is_started()) {
4682     DBUG_ASSERT(has_transactions());
4683     /*
4684       table_share can be NULL in ha_delete_table(). See implementation
4685       of standalone function ha_delete_table() in sql_base.cc.
4686     */
4687     if (table_share == nullptr || table_share->tmp_table == NO_TMP_TABLE) {
4688       /* TempTable and Heap tables don't use/support transactions. */
4689       ha_info->set_trx_read_write();
4690     }
4691   }
4692 }
4693 
4694 /**
4695   Repair table: public interface.
4696 
4697   @sa handler::repair()
4698 */
4699 
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4700 int handler::ha_repair(THD *thd, HA_CHECK_OPT *check_opt) {
4701   int result;
4702   mark_trx_read_write();
4703 
4704   result = repair(thd, check_opt);
4705   DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4706               ha_table_flags() & HA_CAN_REPAIR);
4707 
4708   // TODO: Check if table version in DD needs to be updated.
4709   // Previously we checked/updated FRM version here.
4710   return result;
4711 }
4712 
4713 /**
4714   Start bulk insert.
4715 
4716   Allow the handler to optimize for multiple row insert.
4717 
4718   @note rows == 0 means we will probably insert many rows.
4719 
4720   @param rows  Estimated rows to insert
4721 */
4722 
ha_start_bulk_insert(ha_rows rows)4723 void handler::ha_start_bulk_insert(ha_rows rows) {
4724   DBUG_TRACE;
4725   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4726   estimation_rows_to_insert = rows;
4727   start_bulk_insert(rows);
4728 }
4729 
4730 /**
4731   End bulk insert.
4732 
4733   @return Operation status
4734     @retval 0     Success
4735     @retval != 0  Failure (error code returned)
4736 */
4737 
ha_end_bulk_insert()4738 int handler::ha_end_bulk_insert() {
4739   DBUG_TRACE;
4740   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4741   estimation_rows_to_insert = 0;
4742   return end_bulk_insert();
4743 }
4744 
4745 /**
4746   Bulk update row: public interface.
4747 
4748   @sa handler::bulk_update_row()
4749 */
4750 
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4751 int handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4752                                 uint *dup_key_found) {
4753   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4754   mark_trx_read_write();
4755 
4756   return bulk_update_row(old_data, new_data, dup_key_found);
4757 }
4758 
4759 /**
4760   Delete all rows: public interface.
4761 
4762   @sa handler::delete_all_rows()
4763 */
4764 
ha_delete_all_rows()4765 int handler::ha_delete_all_rows() {
4766   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4767   mark_trx_read_write();
4768 
4769   return delete_all_rows();
4770 }
4771 
4772 /**
4773   Truncate table: public interface.
4774 
4775   @sa handler::truncate()
4776 */
4777 
ha_truncate(dd::Table * table_def)4778 int handler::ha_truncate(dd::Table *table_def) {
4779   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4780   mark_trx_read_write();
4781 
4782   return truncate(table_def);
4783 }
4784 
4785 /**
4786   Optimize table: public interface.
4787 
4788   @sa handler::optimize()
4789 */
4790 
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4791 int handler::ha_optimize(THD *thd, HA_CHECK_OPT *check_opt) {
4792   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4793   mark_trx_read_write();
4794 
4795   return optimize(thd, check_opt);
4796 }
4797 
4798 /**
4799   Analyze table: public interface.
4800 
4801   @sa handler::analyze()
4802 */
4803 
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4804 int handler::ha_analyze(THD *thd, HA_CHECK_OPT *check_opt) {
4805   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4806   mark_trx_read_write();
4807 
4808   return analyze(thd, check_opt);
4809 }
4810 
4811 /**
4812   Check and repair table: public interface.
4813 
4814   @sa handler::check_and_repair()
4815 */
4816 
ha_check_and_repair(THD * thd)4817 bool handler::ha_check_and_repair(THD *thd) {
4818   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_UNLCK);
4819   mark_trx_read_write();
4820 
4821   return check_and_repair(thd);
4822 }
4823 
4824 /**
4825   Disable indexes: public interface.
4826 
4827   @sa handler::disable_indexes()
4828 */
4829 
ha_disable_indexes(uint mode)4830 int handler::ha_disable_indexes(uint mode) {
4831   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4832   mark_trx_read_write();
4833 
4834   return disable_indexes(mode);
4835 }
4836 
4837 /**
4838   Enable indexes: public interface.
4839 
4840   @sa handler::enable_indexes()
4841 */
4842 
ha_enable_indexes(uint mode)4843 int handler::ha_enable_indexes(uint mode) {
4844   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4845   mark_trx_read_write();
4846 
4847   return enable_indexes(mode);
4848 }
4849 
4850 /**
4851   Discard or import tablespace: public interface.
4852 
4853   @sa handler::discard_or_import_tablespace()
4854 */
4855 
ha_discard_or_import_tablespace(bool discard,dd::Table * table_def)4856 int handler::ha_discard_or_import_tablespace(bool discard,
4857                                              dd::Table *table_def) {
4858   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4859   mark_trx_read_write();
4860 
4861   return discard_or_import_tablespace(discard, table_def);
4862 }
4863 
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,const dd::Table * old_table_def,dd::Table * new_table_def)4864 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4865                                              Alter_inplace_info *ha_alter_info,
4866                                              const dd::Table *old_table_def,
4867                                              dd::Table *new_table_def) {
4868   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4869   mark_trx_read_write();
4870 
4871   return prepare_inplace_alter_table(altered_table, ha_alter_info,
4872                                      old_table_def, new_table_def);
4873 }
4874 
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit,const dd::Table * old_table_def,dd::Table * new_table_def)4875 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4876                                             Alter_inplace_info *ha_alter_info,
4877                                             bool commit,
4878                                             const dd::Table *old_table_def,
4879                                             dd::Table *new_table_def) {
4880   /*
4881     At this point we should have an exclusive metadata lock on the table.
4882     The exception is if we're about to roll back changes (commit= false).
4883     In this case, we might be rolling back after a failed lock upgrade,
4884     so we could be holding the same lock level as for inplace_alter_table().
4885   */
4886   DBUG_ASSERT(ha_thd()->mdl_context.owns_equal_or_stronger_lock(
4887                   MDL_key::TABLE, table->s->db.str, table->s->table_name.str,
4888                   MDL_EXCLUSIVE) ||
4889               !commit);
4890 
4891   return commit_inplace_alter_table(altered_table, ha_alter_info, commit,
4892                                     old_table_def, new_table_def);
4893 }
4894 
4895 /*
4896    Default implementation to support in-place/instant alter table
4897    for operations which do not affect table data.
4898 */
4899 
check_if_supported_inplace_alter(TABLE * altered_table MY_ATTRIBUTE ((unused)),Alter_inplace_info * ha_alter_info)4900 enum_alter_inplace_result handler::check_if_supported_inplace_alter(
4901     TABLE *altered_table MY_ATTRIBUTE((unused)),
4902     Alter_inplace_info *ha_alter_info) {
4903   DBUG_TRACE;
4904 
4905   HA_CREATE_INFO *create_info = ha_alter_info->create_info;
4906 
4907   Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations =
4908       Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4909       Alter_inplace_info::ALTER_COLUMN_NAME |
4910       Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4911       Alter_inplace_info::CHANGE_CREATE_OPTION |
4912       Alter_inplace_info::ALTER_RENAME | Alter_inplace_info::RENAME_INDEX |
4913       Alter_inplace_info::ALTER_INDEX_COMMENT |
4914       Alter_inplace_info::CHANGE_INDEX_OPTION |
4915       Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4916 
4917   /* Is there at least one operation that requires copy algorithm? */
4918   if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4919     return HA_ALTER_INPLACE_NOT_SUPPORTED;
4920 
4921   /*
4922     ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4923     ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4924     change column charsets and so not supported in-place through
4925     old API.
4926 
4927     Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4928     not supported as in-place operations in old API either.
4929   */
4930   if (create_info->used_fields &
4931           (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET |
4932            HA_CREATE_USED_PACK_KEYS | HA_CREATE_USED_MAX_ROWS) ||
4933       (table->s->row_type != create_info->row_type))
4934     return HA_ALTER_INPLACE_NOT_SUPPORTED;
4935 
4936   // The presence of engine attributes does not prevent inplace so
4937   // that we get the same behavior as COMMENT. If SEs support engine
4938   // attribute values which are incompatible with INPLACE the need to
4939   // check for that when overriding (as they must do for parsed
4940   // comments).
4941 
4942   uint table_changes = (ha_alter_info->handler_flags &
4943                         Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
4944                            ? IS_EQUAL_PACK_LENGTH
4945                            : IS_EQUAL_YES;
4946   if (table->file->check_if_incompatible_data(create_info, table_changes) ==
4947       COMPATIBLE_DATA_YES)
4948     return HA_ALTER_INPLACE_INSTANT;
4949 
4950   return HA_ALTER_INPLACE_NOT_SUPPORTED;
4951 }
4952 
report_unsupported_error(const char * not_supported,const char * try_instead)4953 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4954                                                   const char *try_instead) {
4955   if (unsupported_reason == nullptr)
4956     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0), not_supported,
4957              try_instead);
4958   else
4959     my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), not_supported,
4960              unsupported_reason, try_instead);
4961 }
4962 
4963 /**
4964   Rename table: public interface.
4965 
4966   @sa handler::rename_table()
4967 */
4968 
ha_rename_table(const char * from,const char * to,const dd::Table * from_table_def,dd::Table * to_table_def)4969 int handler::ha_rename_table(const char *from, const char *to,
4970                              const dd::Table *from_table_def,
4971                              dd::Table *to_table_def) {
4972   DBUG_ASSERT(m_lock_type == F_UNLCK);
4973   mark_trx_read_write();
4974 
4975   return rename_table(from, to, from_table_def, to_table_def);
4976 }
4977 
4978 /**
4979   Delete table: public interface.
4980 
4981   @sa handler::delete_table()
4982 */
4983 
ha_delete_table(const char * name,const dd::Table * table_def)4984 int handler::ha_delete_table(const char *name, const dd::Table *table_def) {
4985   DBUG_ASSERT(m_lock_type == F_UNLCK);
4986   mark_trx_read_write();
4987 
4988   return delete_table(name, table_def);
4989 }
4990 
4991 /**
4992   Drop table in the engine: public interface.
4993 
4994   @sa handler::drop_table()
4995 */
4996 
ha_drop_table(const char * name)4997 void handler::ha_drop_table(const char *name) {
4998   DBUG_ASSERT(m_lock_type == F_UNLCK);
4999   mark_trx_read_write();
5000 
5001   return drop_table(name);
5002 }
5003 
5004 /**
5005   Create a table in the engine: public interface.
5006 
5007   @sa handler::create()
5008 */
5009 
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info,dd::Table * table_def)5010 int handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info,
5011                        dd::Table *table_def) {
5012   DBUG_ASSERT(m_lock_type == F_UNLCK);
5013   mark_trx_read_write();
5014 
5015   return create(name, form, info, table_def);
5016 }
5017 
5018 /**
5019  * Prepares the secondary engine for table load.
5020  *
5021  * @param table The table to load into the secondary engine. Its read_set tells
5022  * which columns to load.
5023  *
5024  * @sa handler::prepare_load_table()
5025  */
ha_prepare_load_table(const TABLE & table)5026 int handler::ha_prepare_load_table(const TABLE &table) {
5027   return prepare_load_table(table);
5028 }
5029 
5030 /**
5031  * Loads a table into its defined secondary storage engine: public interface.
5032  *
5033  * @param table The table to load into the secondary engine. Its read_set tells
5034  * which columns to load.
5035  *
5036  * @sa handler::load_table()
5037  */
ha_load_table(const TABLE & table)5038 int handler::ha_load_table(const TABLE &table) { return load_table(table); }
5039 
5040 /**
5041  * Unloads a table from its defined secondary storage engine: public interface.
5042  *
5043  * @sa handler::unload_table()
5044  */
ha_unload_table(const char * db_name,const char * table_name,bool error_if_not_loaded)5045 int handler::ha_unload_table(const char *db_name, const char *table_name,
5046                              bool error_if_not_loaded) {
5047   return unload_table(db_name, table_name, error_if_not_loaded);
5048 }
5049 
5050 /**
5051   Get the hard coded SE private data from the handler for a DD table.
5052 
5053   @sa handler::get_se_private_data()
5054 */
ha_get_se_private_data(dd::Table * dd_table,bool reset)5055 bool handler::ha_get_se_private_data(dd::Table *dd_table, bool reset) {
5056   return get_se_private_data(dd_table, reset);
5057 }
5058 
5059 /**
5060   Tell the storage engine that it is allowed to "disable transaction" in the
5061   handler. It is a hint that ACID is not required - it is used in NDB for
5062   ALTER TABLE, for example, when data are copied to temporary table.
5063   A storage engine may treat this hint any way it likes. NDB for example
5064   starts to commit every now and then automatically.
5065   This hint can be safely ignored.
5066 */
ha_enable_transaction(THD * thd,bool on)5067 int ha_enable_transaction(THD *thd, bool on) {
5068   int error = 0;
5069   DBUG_TRACE;
5070   DBUG_PRINT("enter", ("on: %d", (int)on));
5071 
5072   if ((thd->get_transaction()->m_flags.enabled = on)) {
5073     /*
5074       Now all storage engines should have transaction handling enabled.
5075       But some may have it enabled all the time - "disabling" transactions
5076       is an optimization hint that storage engine is free to ignore.
5077       So, let's commit an open transaction (if any) now.
5078     */
5079     if (!(error = ha_commit_trans(thd, false)))
5080       error = trans_commit_implicit(thd);
5081   }
5082   return error;
5083 }
5084 
index_next_same(uchar * buf,const uchar * key,uint keylen)5085 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen) {
5086   int error;
5087   DBUG_TRACE;
5088   if (!(error = index_next(buf))) {
5089     ptrdiff_t ptrdiff = buf - table->record[0];
5090     uchar *save_record_0 = nullptr;
5091     KEY *key_info = nullptr;
5092     KEY_PART_INFO *key_part = nullptr;
5093     KEY_PART_INFO *key_part_end = nullptr;
5094 
5095     /*
5096       key_cmp_if_same() compares table->record[0] against 'key'.
5097       In parts it uses table->record[0] directly, in parts it uses
5098       field objects with their local pointers into table->record[0].
5099       If 'buf' is distinct from table->record[0], we need to move
5100       all record references. This is table->record[0] itself and
5101       the field pointers of the fields used in this key.
5102     */
5103     if (ptrdiff) {
5104       save_record_0 = table->record[0];
5105       table->record[0] = buf;
5106       key_info = table->key_info + active_index;
5107       key_part = key_info->key_part;
5108       key_part_end = key_part + key_info->user_defined_key_parts;
5109       for (; key_part < key_part_end; key_part++) {
5110         DBUG_ASSERT(key_part->field);
5111         key_part->field->move_field_offset(ptrdiff);
5112       }
5113     }
5114 
5115     if (key_cmp_if_same(table, key, active_index, keylen))
5116       error = HA_ERR_END_OF_FILE;
5117 
5118     /* Move back if necessary. */
5119     if (ptrdiff) {
5120       table->record[0] = save_record_0;
5121       for (key_part = key_info->key_part; key_part < key_part_end; key_part++)
5122         key_part->field->move_field_offset(-ptrdiff);
5123     }
5124   }
5125   return error;
5126 }
5127 
5128 /****************************************************************************
5129 ** Some general functions that isn't in the handler class
5130 ****************************************************************************/
5131 
5132 /**
5133   Initiates table-file and calls appropriate database-creator.
5134 
5135   @param thd                 Thread context.
5136   @param path                Path to table file (without extension).
5137   @param db                  Database name.
5138   @param table_name          Table name.
5139   @param create_info         HA_CREATE_INFO describing table.
5140   @param update_create_info  Indicates that create_info needs to be
5141                              updated from table share.
5142   @param is_temp_table       Indicates that this is temporary table (for
5143                              cases when this info is not available from
5144                              HA_CREATE_INFO).
5145   @param table_def           Data-dictionary object describing table to
5146                              be used for table creation. Can be adjusted
5147                              by storage engine if it supports atomic DDL.
5148                              For non-temporary tables these changes will
5149                              be saved to the data-dictionary by this call.
5150 
5151   @retval
5152    0  ok
5153   @retval
5154    1  error
5155 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table,dd::Table * table_def)5156 int ha_create_table(THD *thd, const char *path, const char *db,
5157                     const char *table_name, HA_CREATE_INFO *create_info,
5158                     bool update_create_info, bool is_temp_table,
5159                     dd::Table *table_def) {
5160   int error = 1;
5161   TABLE table;
5162   char name_buff[FN_REFLEN];
5163   const char *name;
5164   TABLE_SHARE share;
5165 #ifdef HAVE_PSI_TABLE_INTERFACE
5166   bool temp_table = is_temp_table ||
5167                     (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5168                     (strstr(path, tmp_file_prefix) != nullptr);
5169 #endif
5170   DBUG_TRACE;
5171 
5172   init_tmp_table_share(thd, &share, db, 0, table_name, path, nullptr);
5173 
5174   if (open_table_def(thd, &share, *table_def)) goto err;
5175 
5176 #ifdef HAVE_PSI_TABLE_INTERFACE
5177   share.m_psi = PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5178 #endif
5179 
5180   // When db_stat is 0, we can pass nullptr as dd::Table since it won't be used.
5181   destroy(&table);
5182   if (open_table_from_share(thd, &share, "", 0, (uint)READ_ALL, 0, &table, true,
5183                             nullptr)) {
5184 #ifdef HAVE_PSI_TABLE_INTERFACE
5185     PSI_TABLE_CALL(drop_table_share)
5186     (temp_table, db, strlen(db), table_name, strlen(table_name));
5187 #endif
5188     goto err;
5189   }
5190 
5191   if (update_create_info) update_create_info_from_table(create_info, &table);
5192 
5193   name = get_canonical_filename(table.file, share.path.str, name_buff);
5194 
5195   error = table.file->ha_create(name, &table, create_info, table_def);
5196 
5197   if (error) {
5198     table.file->print_error(error, MYF(0));
5199 #ifdef HAVE_PSI_TABLE_INTERFACE
5200     PSI_TABLE_CALL(drop_table_share)
5201     (temp_table, db, strlen(db), table_name, strlen(table_name));
5202 #endif
5203   } else {
5204     /*
5205       We do post-create update only for engines supporting atomic DDL
5206       as only such engines are allowed to update dd::Table objects in
5207       handler::ha_create().
5208       The dd::Table objects for temporary tables are not stored in DD
5209       so do not need DD update.
5210       The dd::Table objects representing the DD tables themselves cannot
5211       be stored until the DD tables have been created in the SE.
5212     */
5213     if (!((create_info->options & HA_LEX_CREATE_TMP_TABLE) || is_temp_table ||
5214           dd::get_dictionary()->is_dd_table_name(db, table_name)) &&
5215         (table.file->ht->flags & HTON_SUPPORTS_ATOMIC_DDL)) {
5216       if (thd->dd_client()->update<dd::Table>(table_def)) error = 1;
5217     }
5218   }
5219   (void)closefrm(&table, false);
5220 err:
5221   free_table_share(&share);
5222   return error != 0;
5223 }
5224 
5225 /**
5226   Try to discover table from engine.
5227 
5228   @note
5229     If found, import the serialized dictionary information.
5230 
5231   @retval
5232   -1    Table did not exists
5233   @retval
5234    0    Table created ok
5235   @retval
5236    > 0  Error, table existed but could not be created
5237 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5238 int ha_create_table_from_engine(THD *thd, const char *db, const char *name) {
5239   int error;
5240   uchar *sdi_blob;
5241   size_t sdi_len;
5242   DBUG_TRACE;
5243   DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5244 
5245   if ((error = ha_discover(thd, db, name, &sdi_blob, &sdi_len))) {
5246     /* Table could not be discovered and thus not created */
5247     return error;
5248   }
5249 
5250   /*
5251     Table was successfully discovered from SE, check if SDI need
5252     to be installed or if that has already been done by SE.
5253     No SDI blob returned from SE indicates it has installed
5254     the table definition for this table into DD itself.
5255     Otherwise, import the SDI based on the sdi_blob and sdi_len,
5256     which are set.
5257   */
5258   if (sdi_blob) {
5259     error = import_serialized_meta_data(sdi_blob, sdi_len, true);
5260     my_free(sdi_blob);
5261     if (error) return 2;
5262   }
5263 
5264   dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
5265   const dd::Table *table_def = nullptr;
5266   if (thd->dd_client()->acquire(db, name, &table_def)) return 3;
5267 
5268   if (table_def == nullptr) {
5269     my_error(ER_NO_SUCH_TABLE, MYF(0), db, name);
5270     return 3;
5271   }
5272 
5273   char path[FN_REFLEN + 1];
5274   build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5275 
5276   TABLE_SHARE share;
5277   init_tmp_table_share(thd, &share, db, 0, name, path, nullptr);
5278 
5279   if (open_table_def(thd, &share, *table_def)) return 3;
5280 
5281   TABLE table;
5282   // When db_stat is 0, we can pass nullptr as dd::Table since it won't be used.
5283   if (open_table_from_share(thd, &share, "", 0, 0, 0, &table, false, nullptr)) {
5284     free_table_share(&share);
5285     return 3;
5286   }
5287 
5288   HA_CREATE_INFO create_info;
5289   update_create_info_from_table(&create_info, &table);
5290   create_info.table_options |= HA_OPTION_CREATE_FROM_ENGINE;
5291 
5292   get_canonical_filename(table.file, path, path);
5293   std::unique_ptr<dd::Table> table_def_clone(table_def->clone());
5294   error =
5295       table.file->ha_create(path, &table, &create_info, table_def_clone.get());
5296   /*
5297     Note that the table_def_clone is not stored into the DD,
5298     necessary changes to the table_def should already have
5299     been done in ha_discover/import_serialized_meta_data.
5300   */
5301   (void)closefrm(&table, true);
5302 
5303   return error != 0;
5304 }
5305 
5306 /**
5307   Try to find a table in a storage engine.
5308 
5309   @param thd  Thread handle
5310   @param db   Normalized table schema name
5311   @param name Normalized table name.
5312   @param[out] exists Only valid if the function succeeded.
5313 
5314   @retval true   An error is found
5315   @retval false  Success, check *exists
5316 */
5317 
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5318 bool ha_check_if_table_exists(THD *thd, const char *db, const char *name,
5319                               bool *exists) {
5320   uchar *frmblob = nullptr;
5321   size_t frmlen;
5322   DBUG_TRACE;
5323 
5324   *exists = !ha_discover(thd, db, name, &frmblob, &frmlen);
5325   if (*exists) my_free(frmblob);
5326 
5327   return false;
5328 }
5329 
5330 /**
5331   Check if a table specified by name is a system table.
5332 
5333   @param       db                         Database name for the table.
5334   @param       table_name                 Table name to be checked.
5335   @param[out]  is_sql_layer_system_table  True if a system table belongs to
5336                                           sql_layer.
5337 
5338   @return Operation status
5339     @retval    true              If the table name is a system table.
5340     @retval    false             If the table name is a user-level table.
5341 */
5342 
check_if_system_table(const char * db,const char * table_name,bool * is_sql_layer_system_table)5343 static bool check_if_system_table(const char *db, const char *table_name,
5344                                   bool *is_sql_layer_system_table) {
5345   // Check if we have the system database name in the command.
5346   if (!dd::get_dictionary()->is_dd_schema_name(db)) return false;
5347 
5348   // Check if this is SQL layer system tables.
5349   if (dd::get_dictionary()->is_system_table_name(db, table_name))
5350     *is_sql_layer_system_table = true;
5351 
5352   return true;
5353 }
5354 
5355 /**
5356   @brief Check if a given table is a system table.
5357 
5358   @details The primary purpose of introducing this function is to stop system
5359   tables to be created or being moved to undesired storage engines.
5360 
5361   @todo There is another function called is_system_table_name() used by
5362         get_table_category(), which is used to set TABLE_SHARE table_category.
5363         It checks only a subset of table name like proc, event and time*.
5364         We cannot use below function in get_table_category(),
5365         as that affects locking mechanism. If we need to
5366         unify these functions, we need to fix locking issues generated.
5367 
5368   @param   hton                  Handlerton of new engine.
5369   @param   db                    Database name.
5370   @param   table_name            Table name to be checked.
5371 
5372   @return Operation status
5373     @retval  true                If the table name is a valid system table
5374                                  or if its a valid user table.
5375 
5376     @retval  false               If the table name is a system table name
5377                                  and does not belong to engine specified
5378                                  in the command.
5379 */
5380 
ha_check_if_supported_system_table(handlerton * hton,const char * db,const char * table_name)5381 bool ha_check_if_supported_system_table(handlerton *hton, const char *db,
5382                                         const char *table_name) {
5383   DBUG_TRACE;
5384   st_sys_tbl_chk_params check_params;
5385 
5386   check_params.is_sql_layer_system_table = false;
5387   if (!check_if_system_table(db, table_name,
5388                              &check_params.is_sql_layer_system_table))
5389     return true;  // It's a user table name
5390 
5391   // Check if this is a system table and if some engine supports it.
5392   check_params.status = check_params.is_sql_layer_system_table
5393                             ? st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE
5394                             : st_sys_tbl_chk_params::NOT_KNOWN_SYSTEM_TABLE;
5395   check_params.db_type = hton->db_type;
5396   check_params.table_name = table_name;
5397   check_params.db = db;
5398   plugin_foreach(nullptr, check_engine_system_table_handlerton,
5399                  MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5400 
5401   // SE does not support this system table.
5402   if (check_params.status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5403     return false;
5404 
5405   // It's a system table or a valid user table.
5406   return true;
5407 }
5408 
5409 /**
5410   @brief Called for each SE to check if given db, tablename is a system table.
5411 
5412   @details The primary purpose of introducing this function is to stop system
5413   tables to be created or being moved to undesired storage engines.
5414 
5415   @param   plugin  Points to specific SE.
5416   @param   arg     Is of type struct st_sys_tbl_chk_params.
5417 
5418   @note
5419     args->status   Indicates OUT param,
5420                    see struct st_sys_tbl_chk_params definition for more info.
5421 
5422   @return Operation status
5423     @retval  true  There was a match found.
5424                    This will stop doing checks with other SE's.
5425 
5426     @retval  false There was no match found.
5427                    Other SE's will be checked to find a match.
5428 */
check_engine_system_table_handlerton(THD *,plugin_ref plugin,void * arg)5429 static bool check_engine_system_table_handlerton(THD *, plugin_ref plugin,
5430                                                  void *arg) {
5431   st_sys_tbl_chk_params *check_params = (st_sys_tbl_chk_params *)arg;
5432   handlerton *hton = plugin_data<handlerton *>(plugin);
5433 
5434   // Do we already know that the table is a system table?
5435   if (check_params->status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE) {
5436     /*
5437       If this is the same SE specified in the command, we can
5438       simply ask the SE if it supports it stop the search regardless.
5439     */
5440     if (hton->db_type == check_params->db_type) {
5441       if (hton->is_supported_system_table &&
5442           hton->is_supported_system_table(
5443               check_params->db, check_params->table_name,
5444               check_params->is_sql_layer_system_table))
5445         check_params->status = st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5446       return true;
5447     }
5448     /*
5449       If this is a different SE, there is no point in asking the SE
5450       since we already know it's a system table and we don't care
5451       if it is supported or not.
5452     */
5453     return false;
5454   }
5455 
5456   /*
5457     We don't yet know if the table is a system table or not.
5458     We therefore must always ask the SE.
5459   */
5460   if (hton->is_supported_system_table &&
5461       hton->is_supported_system_table(
5462           check_params->db, check_params->table_name,
5463           check_params->is_sql_layer_system_table)) {
5464     /*
5465       If this is the same SE specified in the command, we know it's a
5466       supported system table and can stop the search.
5467     */
5468     if (hton->db_type == check_params->db_type) {
5469       check_params->status = st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5470       return true;
5471     } else
5472       check_params->status = st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE;
5473   }
5474 
5475   return false;
5476 }
5477 
rm_tmp_tables_handlerton(THD * thd,plugin_ref plugin,void * files)5478 static bool rm_tmp_tables_handlerton(THD *thd, plugin_ref plugin, void *files) {
5479   handlerton *hton = plugin_data<handlerton *>(plugin);
5480 
5481   if (hton->state == SHOW_OPTION_YES && hton->rm_tmp_tables &&
5482       hton->rm_tmp_tables(hton, thd, (List<LEX_STRING> *)files))
5483     return true;
5484 
5485   return false;
5486 }
5487 
5488 /**
5489   Ask all SEs to drop all temporary tables which have been left from
5490   previous server run. Used on server start-up.
5491 
5492   @param[in]      thd    Thread context.
5493   @param[in,out]  files  List of files in directories for temporary files
5494                          which match tmp_file_prefix and thus can belong to
5495                          temporary tables. If any SE recognizes some file as
5496                          belonging to temporary table in this SE and deletes
5497                          the file it is also supposed to remove file from
5498                          this list.
5499 */
5500 
ha_rm_tmp_tables(THD * thd,List<LEX_STRING> * files)5501 bool ha_rm_tmp_tables(THD *thd, List<LEX_STRING> *files) {
5502   return plugin_foreach(thd, rm_tmp_tables_handlerton,
5503                         MYSQL_STORAGE_ENGINE_PLUGIN, files);
5504 }
5505 
5506 /**
5507   Default implementation for handlerton::rm_tmp_tables() method which
5508   simply removes all files from "files" list which have one of SE's
5509   extensions. This implementation corresponds to default implementation
5510   of handler::delete_table() method.
5511 */
5512 
default_rm_tmp_tables(handlerton * hton,THD *,List<LEX_STRING> * files)5513 bool default_rm_tmp_tables(handlerton *hton, THD *, List<LEX_STRING> *files) {
5514   List_iterator<LEX_STRING> files_it(*files);
5515   LEX_STRING *file_path;
5516 
5517   if (!hton->file_extensions) return false;
5518 
5519   while ((file_path = files_it++)) {
5520     const char *file_ext = fn_ext(file_path->str);
5521 
5522     for (const char **ext = hton->file_extensions; *ext; ext++) {
5523       if (strcmp(file_ext, *ext) == 0) {
5524         if (my_is_symlink(file_path->str, nullptr) &&
5525             test_if_data_home_dir(file_path->str)) {
5526           /*
5527             For safety reasons, if temporary table file is a symlink pointing
5528             to a file in the data directory, don't delete the file, delete
5529             symlink file only. It would be nicer to not delete symlinked files
5530             at all but MyISAM supports temporary tables with DATA
5531             DIRECTORY/INDEX DIRECTORY options.
5532           */
5533           (void)mysql_file_delete(key_file_misc, file_path->str, MYF(0));
5534         } else
5535           (void)mysql_file_delete_with_symlink(key_file_misc, file_path->str,
5536                                                MYF(0));
5537         files_it.remove();
5538         break;
5539       }
5540     }
5541   }
5542   return false;
5543 }
5544 
5545 /*****************************************************************************
5546   Key cache handling.
5547 
5548   This code is only relevant for ISAM/MyISAM tables
5549 
5550   key_cache->cache may be 0 only in the case where a key cache is not
5551   initialized or when we where not able to init the key cache in a previous
5552   call to ha_init_key_cache() (probably out of memory)
5553 *****************************************************************************/
5554 
5555 /**
5556   Init a key cache if it has not been initied before.
5557 */
ha_init_key_cache(const char *,KEY_CACHE * key_cache)5558 int ha_init_key_cache(const char *, KEY_CACHE *key_cache) {
5559   DBUG_TRACE;
5560 
5561   if (!key_cache->key_cache_inited) {
5562     mysql_mutex_lock(&LOCK_global_system_variables);
5563     size_t tmp_buff_size = (size_t)key_cache->param_buff_size;
5564     ulonglong tmp_block_size = key_cache->param_block_size;
5565     ulonglong division_limit = key_cache->param_division_limit;
5566     ulonglong age_threshold = key_cache->param_age_threshold;
5567     mysql_mutex_unlock(&LOCK_global_system_variables);
5568     return !init_key_cache(key_cache, tmp_block_size, tmp_buff_size,
5569                            division_limit, age_threshold);
5570   }
5571   return 0;
5572 }
5573 
5574 /**
5575   Resize key cache.
5576 */
ha_resize_key_cache(KEY_CACHE * key_cache)5577 int ha_resize_key_cache(KEY_CACHE *key_cache) {
5578   DBUG_TRACE;
5579 
5580   if (key_cache->key_cache_inited) {
5581     mysql_mutex_lock(&LOCK_global_system_variables);
5582     size_t tmp_buff_size = (size_t)key_cache->param_buff_size;
5583     ulonglong tmp_block_size = key_cache->param_block_size;
5584     ulonglong division_limit = key_cache->param_division_limit;
5585     ulonglong age_threshold = key_cache->param_age_threshold;
5586     mysql_mutex_unlock(&LOCK_global_system_variables);
5587     const int retval =
5588         resize_key_cache(key_cache, keycache_thread_var(), tmp_block_size,
5589                          tmp_buff_size, division_limit, age_threshold);
5590     return !retval;
5591   }
5592   return 0;
5593 }
5594 
5595 /**
5596   Move all tables from one key cache to another one.
5597 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5598 int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache) {
5599   mi_change_key_cache(old_key_cache, new_key_cache);
5600   return 0;
5601 }
5602 
5603 struct st_discover_args {
5604   const char *db;
5605   const char *name;
5606   uchar **frmblob;
5607   size_t *frmlen;
5608 };
5609 
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5610 static bool discover_handlerton(THD *thd, plugin_ref plugin, void *arg) {
5611   st_discover_args *vargs = (st_discover_args *)arg;
5612   handlerton *hton = plugin_data<handlerton *>(plugin);
5613   if (hton->state == SHOW_OPTION_YES && hton->discover &&
5614       (!(hton->discover(hton, thd, vargs->db, vargs->name, vargs->frmblob,
5615                         vargs->frmlen))))
5616     return true;
5617 
5618   return false;
5619 }
5620 
5621 /**
5622   Try to discover one table from handler(s).
5623 
5624   @param[in]      thd     Thread context.
5625   @param[in]      db      Schema of table
5626   @param[in]      name    Name of table
5627   @param[out]     frmblob Pointer to blob with table defintion.
5628   @param[out]     frmlen  Length of the returned table definition blob
5629 
5630   @retval
5631     -1   Table did not exists
5632   @retval
5633     0   OK. Table could be discovered from SE.
5634         The *frmblob and *frmlen may be set if returning a blob
5635         which should be installed into data dictionary
5636         by the caller.
5637 
5638   @retval
5639     >0   error.  frmblob and frmlen may not be set
5640 
5641 */
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5642 static int ha_discover(THD *thd, const char *db, const char *name,
5643                        uchar **frmblob, size_t *frmlen) {
5644   int error = -1;  // Table does not exist in any handler
5645   DBUG_TRACE;
5646   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5647   st_discover_args args = {db, name, frmblob, frmlen};
5648 
5649   if (is_prefix(name, tmp_file_prefix)) /* skip temporary tables */
5650     return error;
5651 
5652   if (plugin_foreach(thd, discover_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
5653                      &args))
5654     error = 0;
5655 
5656   if (!error) {
5657     DBUG_ASSERT(!thd->status_var_aggregated);
5658     thd->status_var.ha_discover_count++;
5659   }
5660   return error;
5661 }
5662 
5663 /**
5664   Call this function in order to give the handler the possiblity
5665   to ask engine if there are any new tables that should be written to disk
5666   or any dropped tables that need to be removed from disk
5667 */
5668 struct st_find_files_args {
5669   const char *db;
5670   const char *path;
5671   const char *wild;
5672   bool dir;
5673   List<LEX_STRING> *files;
5674 };
5675 
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5676 static bool find_files_handlerton(THD *thd, plugin_ref plugin, void *arg) {
5677   st_find_files_args *vargs = (st_find_files_args *)arg;
5678   handlerton *hton = plugin_data<handlerton *>(plugin);
5679 
5680   if (hton->state == SHOW_OPTION_YES && hton->find_files)
5681     if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5682                          vargs->dir, vargs->files))
5683       return true;
5684 
5685   return false;
5686 }
5687 
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5688 int ha_find_files(THD *thd, const char *db, const char *path, const char *wild,
5689                   bool dir, List<LEX_STRING> *files) {
5690   int error = 0;
5691   DBUG_TRACE;
5692   DBUG_PRINT("enter", ("db: '%s'  path: '%s'  wild: '%s'  dir: %d", db, path,
5693                        wild ? wild : "NULL", dir));
5694   st_find_files_args args = {db, path, wild, dir, files};
5695 
5696   plugin_foreach(thd, find_files_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
5697                  &args);
5698   /* The return value is not currently used */
5699   return error;
5700 }
5701 
5702 /**
5703   Ask handler if the table exists in engine.
5704   @retval
5705     HA_ERR_NO_SUCH_TABLE     Table does not exist
5706   @retval
5707     HA_ERR_TABLE_EXIST       Table exists
5708 */
5709 struct st_table_exists_in_engine_args {
5710   const char *db;
5711   const char *name;
5712   int err;
5713 };
5714 
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5715 static bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5716                                               void *arg) {
5717   st_table_exists_in_engine_args *vargs = (st_table_exists_in_engine_args *)arg;
5718   handlerton *hton = plugin_data<handlerton *>(plugin);
5719 
5720   int err = HA_ERR_NO_SUCH_TABLE;
5721 
5722   if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5723     err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5724 
5725   vargs->err = err;
5726   if (vargs->err == HA_ERR_TABLE_EXIST) return true;
5727 
5728   return false;
5729 }
5730 
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5731 int ha_table_exists_in_engine(THD *thd, const char *db, const char *name) {
5732   DBUG_TRACE;
5733   DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5734   st_table_exists_in_engine_args args = {db, name, HA_ERR_NO_SUCH_TABLE};
5735   plugin_foreach(thd, table_exists_in_engine_handlerton,
5736                  MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5737   DBUG_PRINT("exit", ("error: %d", args.err));
5738   return args.err;
5739 }
5740 
5741 /*
5742   TODO: change this into a dynamic struct
5743   List<handlerton> does not work as
5744   1. binlog_end is called when MEM_ROOT is gone
5745   2. cannot work with thd MEM_ROOT as memory should be freed
5746 */
5747 #define MAX_HTON_LIST_ST 63
5748 struct hton_list_st {
5749   handlerton *hton[MAX_HTON_LIST_ST];
5750   uint sz;
5751 };
5752 
5753 struct binlog_func_st {
5754   enum_binlog_func fn;
5755   void *arg;
5756 };
5757 
5758 /** @brief
5759   Listing handlertons first to avoid recursive calls and deadlock
5760 */
binlog_func_list(THD *,plugin_ref plugin,void * arg)5761 static bool binlog_func_list(THD *, plugin_ref plugin, void *arg) {
5762   hton_list_st *hton_list = (hton_list_st *)arg;
5763   handlerton *hton = plugin_data<handlerton *>(plugin);
5764   if (hton->state == SHOW_OPTION_YES && hton->binlog_func) {
5765     uint sz = hton_list->sz;
5766     if (sz == MAX_HTON_LIST_ST - 1) {
5767       /* list full */
5768       return false;
5769     }
5770     hton_list->hton[sz] = hton;
5771     hton_list->sz = sz + 1;
5772   }
5773   return false;
5774 }
5775 
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5776 static bool binlog_func_foreach(THD *thd, binlog_func_st *bfn) {
5777   hton_list_st hton_list;
5778   uint i, sz;
5779 
5780   hton_list.sz = 0;
5781   plugin_foreach(thd, binlog_func_list, MYSQL_STORAGE_ENGINE_PLUGIN,
5782                  &hton_list);
5783 
5784   for (i = 0, sz = hton_list.sz; i < sz; i++)
5785     hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5786   return false;
5787 }
5788 
ha_reset_logs(THD * thd)5789 int ha_reset_logs(THD *thd) {
5790   binlog_func_st bfn = {BFN_RESET_LOGS, nullptr};
5791   binlog_func_foreach(thd, &bfn);
5792   return 0;
5793 }
5794 
ha_reset_slave(THD * thd)5795 void ha_reset_slave(THD *thd) {
5796   binlog_func_st bfn = {BFN_RESET_SLAVE, nullptr};
5797   binlog_func_foreach(thd, &bfn);
5798 }
5799 
ha_binlog_wait(THD * thd)5800 void ha_binlog_wait(THD *thd) {
5801   binlog_func_st bfn = {BFN_BINLOG_WAIT, nullptr};
5802   binlog_func_foreach(thd, &bfn);
5803 }
5804 
ha_binlog_index_purge_file(THD * thd,const char * file)5805 int ha_binlog_index_purge_file(THD *thd, const char *file) {
5806   binlog_func_st bfn = {BFN_BINLOG_PURGE_FILE, const_cast<char *>(file)};
5807   binlog_func_foreach(thd, &bfn);
5808   return 0;
5809 }
5810 
5811 struct binlog_log_query_st {
5812   enum_binlog_command binlog_command;
5813   const char *query;
5814   size_t query_length;
5815   const char *db;
5816   const char *table_name;
5817 };
5818 
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5819 static bool binlog_log_query_handlerton2(THD *thd, handlerton *hton,
5820                                          void *args) {
5821   struct binlog_log_query_st *b = (struct binlog_log_query_st *)args;
5822   if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5823     hton->binlog_log_query(hton, thd, b->binlog_command, b->query,
5824                            b->query_length, b->db, b->table_name);
5825   return false;
5826 }
5827 
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5828 static bool binlog_log_query_handlerton(THD *thd, plugin_ref plugin,
5829                                         void *args) {
5830   return binlog_log_query_handlerton2(thd, plugin_data<handlerton *>(plugin),
5831                                       args);
5832 }
5833 
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)5834 void ha_binlog_log_query(THD *thd, handlerton *hton,
5835                          enum_binlog_command binlog_command, const char *query,
5836                          size_t query_length, const char *db,
5837                          const char *table_name) {
5838   struct binlog_log_query_st b;
5839   b.binlog_command = binlog_command;
5840   b.query = query;
5841   b.query_length = query_length;
5842   b.db = db;
5843   b.table_name = table_name;
5844   if (hton == nullptr)
5845     plugin_foreach(thd, binlog_log_query_handlerton,
5846                    MYSQL_STORAGE_ENGINE_PLUGIN, &b);
5847   else
5848     binlog_log_query_handlerton2(thd, hton, &b);
5849 }
5850 
ha_binlog_end(THD * thd)5851 int ha_binlog_end(THD *thd) {
5852   binlog_func_st bfn = {BFN_BINLOG_END, nullptr};
5853   binlog_func_foreach(thd, &bfn);
5854   return 0;
5855 }
5856 
acl_notify_handlerton(THD * thd,plugin_ref plugin,void * data)5857 static bool acl_notify_handlerton(THD *thd, plugin_ref plugin, void *data) {
5858   handlerton *hton = plugin_data<handlerton *>(plugin);
5859   if (hton->state == SHOW_OPTION_YES && hton->acl_notify)
5860     hton->acl_notify(thd,
5861                      static_cast<const class Acl_change_notification *>(data));
5862   return false;
5863 }
5864 
ha_acl_notify(THD * thd,class Acl_change_notification * data)5865 void ha_acl_notify(THD *thd, class Acl_change_notification *data) {
5866   plugin_foreach(thd, acl_notify_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, data);
5867 }
5868 
5869 /**
5870   Calculate cost of 'index only' scan for given index and number of records
5871 
5872   @param keynr    Index number
5873   @param records  Estimated number of records to be retrieved
5874 
5875   @note
5876     It is assumed that we will read trough the whole key range and that all
5877     key blocks are half full (normally things are much better). It is also
5878     assumed that each time we read the next key from the index, the handler
5879     performs a random seek, thus the cost is proportional to the number of
5880     blocks read.
5881 
5882   @return
5883     Estimated cost of 'index only' scan
5884 */
5885 
index_only_read_time(uint keynr,double records)5886 double handler::index_only_read_time(uint keynr, double records) {
5887   double read_time;
5888   uint keys_per_block =
5889       (stats.block_size / 2 /
5890            (table_share->key_info[keynr].key_length + ref_length) +
5891        1);
5892   read_time = ((double)(records + keys_per_block - 1) / (double)keys_per_block);
5893   return read_time;
5894 }
5895 
table_in_memory_estimate() const5896 double handler::table_in_memory_estimate() const {
5897   DBUG_ASSERT(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
5898               (stats.table_in_mem_estimate >= 0.0 &&
5899                stats.table_in_mem_estimate <= 1.0));
5900 
5901   /*
5902     If the storage engine has supplied information about how much of the
5903     table that is currently in a memory buffer, then use this estimate.
5904   */
5905   if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
5906     return stats.table_in_mem_estimate;
5907 
5908   /*
5909     The storage engine has not provided any information about how much of
5910     this index is in memory, use an heuristic to produce an estimate.
5911   */
5912   return estimate_in_memory_buffer(stats.data_file_length);
5913 }
5914 
index_in_memory_estimate(uint keyno) const5915 double handler::index_in_memory_estimate(uint keyno) const {
5916   const KEY *key = &table->key_info[keyno];
5917 
5918   /*
5919     If the storage engine has supplied information about how much of the
5920     index that is currently in a memory buffer, then use this estimate.
5921   */
5922   const double est = key->in_memory_estimate();
5923   if (est != IN_MEMORY_ESTIMATE_UNKNOWN) return est;
5924 
5925   /*
5926     The storage engine has not provided any information about how much of
5927     this index is in memory, use an heuristic to produce an estimate.
5928   */
5929   ulonglong file_length;
5930 
5931   /*
5932     If the index is a clustered primary index, then use the data file
5933     size as estimate for how large the index is.
5934   */
5935   if (keyno == table->s->primary_key && primary_key_is_clustered())
5936     file_length = stats.data_file_length;
5937   else
5938     file_length = stats.index_file_length;
5939 
5940   return estimate_in_memory_buffer(file_length);
5941 }
5942 
estimate_in_memory_buffer(ulonglong table_index_size) const5943 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const {
5944   /*
5945     The storage engine has not provided any information about how much of
5946     the table/index is in memory. In this case we use a heuristic:
5947 
5948     - if the size of the table/index is less than 20 percent (pick any
5949       number) of the memory buffer, then the entire table/index is likely in
5950       memory.
5951     - if the size of the table/index is larger than the memory buffer, then
5952       assume nothing of the table/index is in memory.
5953     - if the size of the table/index is larger than 20 percent but less than
5954       the memory buffer size, then use a linear function of the table/index
5955       size that goes from 1.0 to 0.0.
5956   */
5957 
5958   /*
5959     If the storage engine has information about the size of its
5960     memory buffer, then use this. Otherwise, assume that at least 100 MB
5961     of data can be chached in memory.
5962   */
5963   longlong memory_buf_size = get_memory_buffer_size();
5964   if (memory_buf_size <= 0) memory_buf_size = 100 * 1024 * 1024;  // 100 MB
5965 
5966   /*
5967     Upper limit for the relative size of a table to be considered
5968     entirely available in a memory buffer. If the actual table size is
5969     less than this we assume it is complete cached in a memory buffer.
5970   */
5971   const double table_index_in_memory_limit = 0.2;
5972 
5973   /*
5974     Estimate for how much of the total memory buffer this table/index
5975     can occupy.
5976   */
5977   const double percent_of_mem =
5978       static_cast<double>(table_index_size) / memory_buf_size;
5979 
5980   double in_mem_est;
5981 
5982   if (percent_of_mem < table_index_in_memory_limit)  // Less than 20 percent
5983     in_mem_est = 1.0;
5984   else if (percent_of_mem > 1.0)  // Larger than buffer
5985     in_mem_est = 0.0;
5986   else {
5987     /*
5988       The size of the table/index is larger than
5989       "table_index_in_memory_limit" * "memory_buf_size" but less than
5990       the total size of the memory buffer.
5991     */
5992     in_mem_est = 1.0 - (percent_of_mem - table_index_in_memory_limit) /
5993                            (1.0 - table_index_in_memory_limit);
5994   }
5995   DBUG_ASSERT(in_mem_est >= 0.0 && in_mem_est <= 1.0);
5996 
5997   return in_mem_est;
5998 }
5999 
table_scan_cost()6000 Cost_estimate handler::table_scan_cost() {
6001   /*
6002     This function returns a Cost_estimate object. The function should be
6003     implemented in a way that allows the compiler to use "return value
6004     optimization" to avoid creating the temporary object for the return value
6005     and use of the copy constructor.
6006   */
6007 
6008   const double io_cost = scan_time() * table->cost_model()->page_read_cost(1.0);
6009   Cost_estimate cost;
6010   cost.add_io(io_cost);
6011   return cost;
6012 }
6013 
index_scan_cost(uint index,double ranges MY_ATTRIBUTE ((unused)),double rows)6014 Cost_estimate handler::index_scan_cost(uint index,
6015                                        double ranges MY_ATTRIBUTE((unused)),
6016                                        double rows) {
6017   /*
6018     This function returns a Cost_estimate object. The function should be
6019     implemented in a way that allows the compiler to use "return value
6020     optimization" to avoid creating the temporary object for the return value
6021     and use of the copy constructor.
6022   */
6023 
6024   DBUG_ASSERT(ranges >= 0.0);
6025   DBUG_ASSERT(rows >= 0.0);
6026 
6027   const double io_cost = index_only_read_time(index, rows) *
6028                          table->cost_model()->page_read_cost_index(index, 1.0);
6029   Cost_estimate cost;
6030   cost.add_io(io_cost);
6031   return cost;
6032 }
6033 
read_cost(uint index,double ranges,double rows)6034 Cost_estimate handler::read_cost(uint index, double ranges, double rows) {
6035   /*
6036     This function returns a Cost_estimate object. The function should be
6037     implemented in a way that allows the compiler to use "return value
6038     optimization" to avoid creating the temporary object for the return value
6039     and use of the copy constructor.
6040   */
6041 
6042   DBUG_ASSERT(ranges >= 0.0);
6043   DBUG_ASSERT(rows >= 0.0);
6044 
6045   const double io_cost =
6046       read_time(index, static_cast<uint>(ranges), static_cast<ha_rows>(rows)) *
6047       table->cost_model()->page_read_cost(1.0);
6048   Cost_estimate cost;
6049   cost.add_io(io_cost);
6050   return cost;
6051 }
6052 
6053 /**
6054   Check if key has partially-covered columns
6055 
6056   We can't use DS-MRR to perform range scans when the ranges are over
6057   partially-covered keys, because we'll not have full key part values
6058   (we'll have their prefixes from the index) and will not be able to check
6059   if we've reached the end the range.
6060 
6061   @param table  Table to check keys for
6062   @param keyno  Key to check
6063 
6064   @todo
6065     Allow use of DS-MRR in cases where the index has partially-covered
6066     components but they are not used for scanning.
6067 
6068   @retval true   Yes
6069   @retval false  No
6070 */
6071 
key_uses_partial_cols(TABLE * table,uint keyno)6072 static bool key_uses_partial_cols(TABLE *table, uint keyno) {
6073   KEY_PART_INFO *kp = table->key_info[keyno].key_part;
6074   KEY_PART_INFO *kp_end = kp + table->key_info[keyno].user_defined_key_parts;
6075   for (; kp != kp_end; kp++) {
6076     if (!kp->field->part_of_key.is_set(keyno)) return true;
6077   }
6078   return false;
6079 }
6080 
6081 /****************************************************************************
6082  * Default MRR implementation (MRR to non-MRR converter)
6083  ***************************************************************************/
6084 
6085 /**
6086   Get cost and other information about MRR scan over a known list of ranges
6087 
6088   Calculate estimated cost and other information about an MRR scan for given
6089   sequence of ranges.
6090 
6091   @param keyno           Index number
6092   @param seq             Range sequence to be traversed
6093   @param seq_init_param  First parameter for seq->init()
6094   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
6095                          can't efficiently determine it
6096   @param [in,out] bufsz  IN:  Size of the buffer available for use
6097                          OUT: Size of the buffer that is expected to be actually
6098                               used, or 0 if buffer is not needed.
6099   @param [in,out] flags  A combination of HA_MRR_* flags
6100   @param [out] cost      Estimated cost of MRR access
6101 
6102   @note
6103     This method (or an overriding one in a derived class) must check for
6104     \c thd->killed and return HA_POS_ERROR if it is not zero. This is required
6105     for a user to be able to interrupt the calculation by killing the
6106     connection/query.
6107 
6108   @retval
6109     HA_POS_ERROR  Error or the engine is unable to perform the requested
6110                   scan. Values of OUT parameters are undefined.
6111   @retval
6112     other         OK, *cost contains cost of the scan, *bufsz and *flags
6113                   contain scan parameters.
6114 */
6115 
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg MY_ATTRIBUTE ((unused)),uint * bufsz,uint * flags,Cost_estimate * cost)6116 ha_rows handler::multi_range_read_info_const(
6117     uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param,
6118     uint n_ranges_arg MY_ATTRIBUTE((unused)), uint *bufsz, uint *flags,
6119     Cost_estimate *cost) {
6120   KEY_MULTI_RANGE range;
6121   range_seq_t seq_it;
6122   ha_rows rows, total_rows = 0;
6123   uint n_ranges = 0;
6124   THD *thd = current_thd;
6125 
6126   /* Default MRR implementation doesn't need buffer */
6127   *bufsz = 0;
6128 
6129   DBUG_EXECUTE_IF("bug13822652_2", thd->killed = THD::KILL_QUERY;);
6130 
6131   seq_it = seq->init(seq_init_param, n_ranges, *flags);
6132   while (!seq->next(seq_it, &range)) {
6133     if (unlikely(thd->killed != 0)) return HA_POS_ERROR;
6134 
6135     n_ranges++;
6136     key_range *min_endp, *max_endp;
6137     if (range.range_flag & GEOM_FLAG) {
6138       min_endp = &range.start_key;
6139       max_endp = nullptr;
6140     } else {
6141       min_endp = range.start_key.length ? &range.start_key : nullptr;
6142       max_endp = range.end_key.length ? &range.end_key : nullptr;
6143     }
6144     /*
6145       Get the number of rows in the range. This is done by calling
6146       records_in_range() unless:
6147 
6148         1) The index is unique.
6149            There cannot be more than one matching row, so 1 is
6150            assumed. Note that it is possible that the correct number
6151            is actually 0, so the row estimate may be too high in this
6152            case. Also note: ranges of the form "x IS NULL" may have more
6153            than 1 mathing row so records_in_range() is called for these.
6154         2) SKIP_RECORDS_IN_RANGE will be set when skip_records_in_range or
6155            use_index_statistics are true.
6156            Ranges of the form "x IS NULL" will not use index statistics
6157            because the number of rows with this value are likely to be
6158            very different than the values in the index statistics.
6159 
6160       Note: With SKIP_RECORDS_IN_RANGE, use Index statistics if:
6161             a) Index statistics is available.
6162             b) The range is an equality range but the index is either not
6163                unique or all of the keyparts are not used.
6164     */
6165     int keyparts_used = 0;
6166     if ((range.range_flag & UNIQUE_RANGE) &&  // 1)
6167         !(range.range_flag & NULL_RANGE))
6168       rows = 1; /* there can be at most one row */
6169     else if (range.range_flag & SKIP_RECORDS_IN_RANGE &&  // 2)
6170              !(range.range_flag & NULL_RANGE)) {
6171       if ((range.range_flag & EQ_RANGE) &&
6172           (keyparts_used = my_count_bits(range.start_key.keypart_map)) &&
6173           table->key_info[keyno].has_records_per_key(keyparts_used - 1)) {
6174         rows = static_cast<ha_rows>(
6175             table->key_info[keyno].records_per_key(keyparts_used - 1));
6176       } else {
6177         /*
6178           Return HA_POS_ERROR if the range does not use all key parts and
6179           the key cannot use partial key searches.
6180         */
6181         if ((index_flags(keyno, 0, false) & HA_ONLY_WHOLE_INDEX)) {
6182           DBUG_ASSERT(
6183               (range.range_flag & EQ_RANGE) &&
6184               !table->key_info[keyno].has_records_per_key(keyparts_used - 1));
6185           total_rows = HA_POS_ERROR;
6186           break;
6187         }
6188         /*
6189           Since records_in_range has not been called, set the rows to 1.
6190           FORCE INDEX has been used, cost model values will be ignored anyway.
6191         */
6192         rows = 1;
6193       }
6194     } else {
6195       DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6196       DBUG_ASSERT(min_endp || max_endp);
6197       if (HA_POS_ERROR ==
6198           (rows = this->records_in_range(keyno, min_endp, max_endp))) {
6199         /* Can't scan one range => can't do MRR scan at all */
6200         total_rows = HA_POS_ERROR;
6201         break;
6202       }
6203     }
6204     total_rows += rows;
6205   }
6206 
6207   if (total_rows != HA_POS_ERROR) {
6208     const Cost_model_table *const cost_model = table->cost_model();
6209 
6210     /* The following calculation is the same as in multi_range_read_info(): */
6211     *flags |= (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SUPPORT_SORTED);
6212 
6213     DBUG_ASSERT(cost->is_zero());
6214     if (*flags & HA_MRR_INDEX_ONLY)
6215       *cost = index_scan_cost(keyno, static_cast<double>(n_ranges),
6216                               static_cast<double>(total_rows));
6217     else
6218       *cost = read_cost(keyno, static_cast<double>(n_ranges),
6219                         static_cast<double>(total_rows));
6220     cost->add_cpu(
6221         cost_model->row_evaluate_cost(static_cast<double>(total_rows)) + 0.01);
6222   }
6223   return total_rows;
6224 }
6225 
6226 /**
6227   Get cost and other information about MRR scan over some sequence of ranges
6228 
6229   Calculate estimated cost and other information about an MRR scan for some
6230   sequence of ranges.
6231 
6232   The ranges themselves will be known only at execution phase. When this
6233   function is called we only know number of ranges and a (rough) E(#records)
6234   within those ranges.
6235 
6236   Currently this function is only called for "n-keypart singlepoint" ranges,
6237   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6238 
6239   The flags parameter is a combination of those flags: HA_MRR_SORTED,
6240   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6241 
6242   @param keyno           Index number
6243   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
6244                          range sequence.
6245   @param n_rows          Estimated total number of records contained within all
6246                          of the ranges
6247   @param [in,out] bufsz  IN:  Size of the buffer available for use
6248                          OUT: Size of the buffer that will be actually used, or
6249                               0 if buffer is not needed.
6250   @param [in,out] flags  A combination of HA_MRR_* flags
6251   @param [out] cost      Estimated cost of MRR access
6252 
6253   @retval
6254     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6255           parameters.
6256   @retval
6257     other Error or can't perform the requested scan
6258 */
6259 
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6260 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6261                                        uint *bufsz, uint *flags,
6262                                        Cost_estimate *cost) {
6263   *bufsz = 0; /* Default implementation doesn't need a buffer */
6264 
6265   *flags |= HA_MRR_USE_DEFAULT_IMPL;
6266   *flags |= HA_MRR_SUPPORT_SORTED;
6267 
6268   DBUG_ASSERT(cost->is_zero());
6269 
6270   /* Produce the same cost as non-MRR code does */
6271   if (*flags & HA_MRR_INDEX_ONLY)
6272     *cost = index_scan_cost(keyno, n_ranges, n_rows);
6273   else
6274     *cost = read_cost(keyno, n_ranges, n_rows);
6275   return 0;
6276 }
6277 
6278 /**
6279   Initialize the MRR scan.
6280 
6281   This function may do heavyweight scan
6282   initialization like row prefetching/sorting/etc (NOTE: but better not do
6283   it here as we may not need it, e.g. if we never satisfy WHERE clause on
6284   previous tables. For many implementations it would be natural to do such
6285   initializations in the first multi_read_range_next() call)
6286 
6287   mode is a combination of the following flags: HA_MRR_SORTED,
6288   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6289 
6290   @param seq_funcs       Range sequence to be traversed
6291   @param seq_init_param  First parameter for seq->init()
6292   @param n_ranges        Number of ranges in the sequence
6293   @param mode            Flags, see the description section for the details
6294   @param buf             INOUT: memory buffer to be used
6295 
6296   @note
6297     One must have called index_init() before calling this function. Several
6298     multi_range_read_init() calls may be made in course of one query.
6299 
6300     Until WL#2623 is done (see its text, section 3.2), the following will
6301     also hold:
6302     The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6303     then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6304     This property will only be used by NDB handler until WL#2623 is done.
6305 
6306     Buffer memory management is done according to the following scenario:
6307     The caller allocates the buffer and provides it to the callee by filling
6308     the members of HANDLER_BUFFER structure.
6309     The callee consumes all or some fraction of the provided buffer space, and
6310     sets the HANDLER_BUFFER members accordingly.
6311     The callee may use the buffer memory until the next multi_range_read_init()
6312     call is made, all records have been read, or until index_end() call is
6313     made, whichever comes first.
6314 
6315   @retval 0  OK
6316   @retval 1  Error
6317 */
6318 
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf MY_ATTRIBUTE ((unused)))6319 int handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs,
6320                                    void *seq_init_param, uint n_ranges,
6321                                    uint mode,
6322                                    HANDLER_BUFFER *buf MY_ATTRIBUTE((unused))) {
6323   DBUG_TRACE;
6324   mrr_iter = seq_funcs->init(seq_init_param, n_ranges, mode);
6325   mrr_funcs = *seq_funcs;
6326   mrr_is_output_sorted = mode & HA_MRR_SORTED;
6327   mrr_have_range = false;
6328   return 0;
6329 }
6330 
ha_multi_range_read_next(char ** range_info)6331 int handler::ha_multi_range_read_next(char **range_info) {
6332   int result;
6333   DBUG_TRACE;
6334 
6335   // Set status for the need to update generated fields
6336   m_update_generated_read_fields = table->has_gcol();
6337 
6338   result = multi_range_read_next(range_info);
6339   if (!result && m_update_generated_read_fields) {
6340     result =
6341         update_generated_read_fields(table->record[0], table, active_index);
6342     m_update_generated_read_fields = false;
6343   }
6344   table->set_row_status_from_handler(result);
6345   return result;
6346 }
6347 
6348 /**
6349   Get next record in MRR scan
6350 
6351   Default MRR implementation: read the next record
6352 
6353   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6354                           Otherwise, the opaque value associated with the range
6355                           that contains the returned record.
6356 
6357   @retval 0      OK
6358   @retval other  Error code
6359 */
6360 
multi_range_read_next(char ** range_info)6361 int handler::multi_range_read_next(char **range_info) {
6362   int result = HA_ERR_END_OF_FILE;
6363   int range_res = 0;
6364   bool dup_found = false;
6365   DBUG_TRACE;
6366   // For a multi-valued index the unique filter have to be used for correct
6367   // result
6368   DBUG_ASSERT(!(table->key_info[active_index].flags & HA_MULTI_VALUED_KEY) ||
6369               m_unique);
6370 
6371   if (!mrr_have_range) {
6372     mrr_have_range = true;
6373     goto start;
6374   }
6375 
6376   do {
6377     /*
6378       Do not call read_range_next() if its equality on a unique
6379       index.
6380     */
6381     if (!((mrr_cur_range.range_flag & UNIQUE_RANGE) &&
6382           (mrr_cur_range.range_flag & EQ_RANGE))) {
6383       DBUG_ASSERT(!result || result == HA_ERR_END_OF_FILE);
6384       result = read_range_next();
6385       DBUG_EXECUTE_IF("bug20162055_DEADLOCK", result = HA_ERR_LOCK_DEADLOCK;);
6386       /*
6387         On success check loop condition to filter duplicates, if needed.
6388         Exit on non-EOF error. Use next range on EOF error.
6389       */
6390       if (!result) continue;
6391       if (result != HA_ERR_END_OF_FILE) break;
6392     } else {
6393       if (was_semi_consistent_read()) goto scan_it_again;
6394     }
6395 
6396   start:
6397     /* Try the next range(s) until one matches a record. */
6398     while (!(range_res = mrr_funcs.next(mrr_iter, &mrr_cur_range))) {
6399     scan_it_again:
6400       result = read_range_first(
6401           mrr_cur_range.start_key.keypart_map ? &mrr_cur_range.start_key
6402                                               : nullptr,
6403           mrr_cur_range.end_key.keypart_map ? &mrr_cur_range.end_key : nullptr,
6404           mrr_cur_range.range_flag & EQ_RANGE, mrr_is_output_sorted);
6405       if (result != HA_ERR_END_OF_FILE) break;
6406     }
6407   } while (((result == HA_ERR_END_OF_FILE) ||
6408             (m_unique && (dup_found = filter_dup_records()))) &&
6409            !range_res);
6410 
6411   *range_info = mrr_cur_range.ptr;
6412   /*
6413     Last found record was a duplicate and we retrieved records from all
6414     ranges, so no more records can be returned.
6415   */
6416   if (dup_found && range_res) result = HA_ERR_END_OF_FILE;
6417 
6418   DBUG_PRINT("exit", ("handler::multi_range_read_next result %d", result));
6419   return result;
6420 }
6421 
6422 /****************************************************************************
6423  * DS-MRR implementation
6424  ***************************************************************************/
6425 
6426 /**
6427   DS-MRR: Initialize and start MRR scan
6428 
6429   Initialize and start the MRR scan. Depending on the mode parameter, this
6430   may use default or DS-MRR implementation.
6431 
6432   The DS-MRR implementation will use a second handler object (h2) for
6433   doing scan on the index:
6434   - on the first call to this function the h2 handler will be created
6435     and h2 will be opened using the same index as the main handler
6436     is set to use. The index scan on the main index will be closed
6437     and it will be re-opened to read records from the table using either
6438     no key or the primary key. The h2 handler will be deleted when
6439     reset() is called (which should happen on the end of the statement).
6440   - when dsmrr_close() is called the index scan on h2 is closed.
6441   - on following calls to this function one of the following must be valid:
6442     a. if dsmrr_close has been called:
6443        the main handler (h) must be open on an index, h2 will be opened
6444        using this index, and the index on h will be closed and
6445        h will be re-opened to read reads from the table using either
6446        no key or the primary key.
6447     b. dsmrr_close has not been called:
6448        h2 will already be open, the main handler h must be set up
6449        to read records from the table (handler->inited is RND) either
6450        using the primary index or using no index at all.
6451 
6452   @param         seq_funcs       Interval sequence enumeration functions
6453   @param         seq_init_param  Interval sequence enumeration parameter
6454   @param         n_ranges        Number of ranges in the sequence.
6455   @param         mode            HA_MRR_* modes to use
6456   @param[in,out] buf             Buffer to use
6457 
6458   @retval 0     Ok, Scan started.
6459   @retval other Error
6460 */
6461 
dsmrr_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6462 int DsMrr_impl::dsmrr_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6463                            uint n_ranges, uint mode, HANDLER_BUFFER *buf) {
6464   DBUG_ASSERT(table != nullptr);  // Verify init() called
6465 
6466   uint elem_size;
6467   int retval = 0;
6468   DBUG_TRACE;
6469   THD *const thd = table->in_use;  // current THD
6470 
6471   if (!hint_key_state(thd, table->pos_in_table_list, h->active_index,
6472                       MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6473       mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))  // DS-MRR doesn't sort
6474   {
6475     use_default_impl = true;
6476     retval = h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6477                                                n_ranges, mode, buf);
6478     return retval;
6479   }
6480 
6481   /*
6482     This assert will hit if we have pushed an index condition to the
6483     primary key index and then "change our mind" and use a different
6484     index for retrieving data with MRR. One of the following criteria
6485     must be true:
6486       1. We have not pushed an index conditon on this handler.
6487       2. We have pushed an index condition and this is on the currently used
6488          index.
6489       3. We have pushed an index condition but this is not for the primary key.
6490       4. We have pushed an index condition and this has been transferred to
6491          the clone (h2) of the handler object.
6492   */
6493   DBUG_ASSERT(!h->pushed_idx_cond ||
6494               h->pushed_idx_cond_keyno == h->active_index ||
6495               h->pushed_idx_cond_keyno != table->s->primary_key ||
6496               (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6497 
6498   rowids_buf = buf->buffer;
6499 
6500   is_mrr_assoc = !(mode & HA_MRR_NO_ASSOCIATION);
6501 
6502   if (is_mrr_assoc) {
6503     DBUG_ASSERT(!thd->status_var_aggregated);
6504     table->in_use->status_var.ha_multi_range_read_init_count++;
6505   }
6506 
6507   rowids_buf_end = buf->buffer_end;
6508   elem_size = h->ref_length + (int)is_mrr_assoc * sizeof(void *);
6509   rowids_buf_last =
6510       rowids_buf + ((rowids_buf_end - rowids_buf) / elem_size) * elem_size;
6511   rowids_buf_end = rowids_buf_last;
6512 
6513   /*
6514     The DS-MRR scan uses a second handler object (h2) for doing the
6515     index scan. Create this by cloning the primary handler
6516     object. The h2 handler object is deleted when DsMrr_impl::reset()
6517     is called.
6518   */
6519   if (!h2) {
6520     handler *new_h2;
6521     /*
6522       ::clone() takes up a lot of stack, especially on 64 bit platforms.
6523       The constant 5 is an empiric result.
6524       @todo Is this still the case? Leave it as it is for now but could
6525             likely be removed?
6526     */
6527     if (check_stack_overrun(thd, 5 * STACK_MIN_SIZE, (uchar *)&new_h2))
6528       return 1;
6529 
6530     if (!(new_h2 = h->clone(table->s->normalized_path.str, thd->mem_root)))
6531       return 1;
6532     h2 = new_h2; /* Ok, now can put it into h2 */
6533     table->prepare_for_position();
6534   }
6535 
6536   /*
6537     Open the index scan on h2 using the key from the primary handler.
6538   */
6539   if (h2->active_index == MAX_KEY) {
6540     DBUG_ASSERT(h->active_index != MAX_KEY);
6541     const uint mrr_keyno = h->active_index;
6542 
6543     if ((retval = h2->ha_external_lock(thd, h->get_lock_type()))) goto error;
6544 
6545     if ((retval = h2->extra(HA_EXTRA_KEYREAD))) goto error;
6546 
6547     if ((retval = h2->ha_index_init(mrr_keyno, false))) goto error;
6548 
6549     if ((table->key_info[mrr_keyno].flags & HA_MULTI_VALUED_KEY) &&
6550         (retval = h2->ha_extra(HA_EXTRA_ENABLE_UNIQUE_RECORD_FILTER)))
6551       goto error; /* purecov: inspected */
6552 
6553     // Transfer ICP from h to h2
6554     if (mrr_keyno == h->pushed_idx_cond_keyno) {
6555       if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond)) {
6556         retval = 1;
6557         goto error;
6558       }
6559     } else {
6560       // Cancel any potentially previously pushed index conditions
6561       h2->cancel_pushed_idx_cond();
6562     }
6563   } else {
6564     /*
6565       h2 has already an open index. This happens when the DS-MRR scan
6566       is re-started without closing it first. In this case the primary
6567       handler must be used for reading records from the table, ie. it
6568       must not be opened for doing a new range scan. In this case
6569       the active_index must either not be set or be the primary key.
6570     */
6571     DBUG_ASSERT(h->inited == handler::RND);
6572     DBUG_ASSERT(h->active_index == MAX_KEY ||
6573                 h->active_index == table->s->primary_key);
6574   }
6575 
6576   /*
6577     The index scan is now transferred to h2 and we can close the open
6578     index scan on the primary handler.
6579   */
6580   if (h->inited == handler::INDEX) {
6581     /*
6582       Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6583       which will close the index scan on h2. We need to keep it open, so
6584       temporarily move h2 out of the DsMrr object.
6585     */
6586     handler *save_h2 = h2;
6587     h2 = nullptr;
6588     retval = h->ha_index_end();
6589     h2 = save_h2;
6590     if (retval) goto error;
6591   }
6592 
6593   /*
6594     Verify consistency between h and h2.
6595   */
6596   DBUG_ASSERT(h->inited != handler::INDEX);
6597   DBUG_ASSERT(h->active_index == MAX_KEY ||
6598               h->active_index == table->s->primary_key);
6599   DBUG_ASSERT(h2->inited == handler::INDEX);
6600   DBUG_ASSERT(h2->active_index != MAX_KEY);
6601   DBUG_ASSERT(h->get_lock_type() == h2->get_lock_type());
6602 
6603   if ((retval = h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6604                                                    n_ranges, mode, buf)))
6605     goto error;
6606 
6607   if ((retval = dsmrr_fill_buffer())) goto error;
6608 
6609   /*
6610     If the above call has scanned through all intervals in *seq, then
6611     adjust *buf to indicate that the remaining buffer space will not be used.
6612   */
6613   if (dsmrr_eof) buf->end_of_used_area = rowids_buf_last;
6614 
6615   /*
6616      h->inited == INDEX may occur when 'range checked for each record' is
6617      used.
6618   */
6619   if ((h->inited != handler::RND) &&
6620       ((h->inited == handler::INDEX ? h->ha_index_end() : false) ||
6621        (h->ha_rnd_init(false)))) {
6622     retval = 1;
6623     goto error;
6624   }
6625 
6626   use_default_impl = false;
6627   h->mrr_funcs = *seq_funcs;
6628 
6629   return 0;
6630 error:
6631   h2->ha_index_or_rnd_end();
6632   h2->ha_external_lock(thd, F_UNLCK);
6633   h2->ha_close();
6634   destroy(h2);
6635   h2 = nullptr;
6636   DBUG_ASSERT(retval != 0);
6637   return retval;
6638 }
6639 
dsmrr_close()6640 void DsMrr_impl::dsmrr_close() {
6641   DBUG_TRACE;
6642 
6643   // If there is an open index on h2, then close it
6644   if (h2 && h2->active_index != MAX_KEY) {
6645     h2->ha_index_or_rnd_end();
6646     h2->ha_external_lock(current_thd, F_UNLCK);
6647   }
6648   use_default_impl = true;
6649 }
6650 
reset()6651 void DsMrr_impl::reset() {
6652   DBUG_TRACE;
6653 
6654   if (h2) {
6655     // Close any ongoing DS-MRR scan
6656     dsmrr_close();
6657 
6658     // Close and delete the h2 handler
6659     h2->ha_close();
6660     destroy(h2);
6661     h2 = nullptr;
6662   }
6663 }
6664 
6665 /**
6666   DS-MRR: Fill the buffer with rowids and sort it by rowid
6667 
6668   {This is an internal function of DiskSweep MRR implementation}
6669   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6670   buffer. When the buffer is full or scan is completed, sort the buffer by
6671   rowid and return.
6672 
6673   The function assumes that rowids buffer is empty when it is invoked.
6674 
6675   @retval 0      OK, the next portion of rowids is in the buffer,
6676                  properly ordered
6677   @retval other  Error
6678 */
6679 
dsmrr_fill_buffer()6680 int DsMrr_impl::dsmrr_fill_buffer() {
6681   char *range_info;
6682   int res = 0;
6683   DBUG_TRACE;
6684   DBUG_ASSERT(rowids_buf < rowids_buf_end);
6685 
6686   /*
6687     Set key_read to true since we only read fields from the index.
6688     This ensures that any virtual columns are read from index and are not
6689     attempted to be evaluated from base columns.
6690     (Do not use TABLE::set_keyread() since the MRR implementation operates
6691     with two handler objects, and set_keyread() would manipulate the keyread
6692     property of the wrong handler. MRR sets the handlers' keyread properties
6693     when initializing the MRR operation, independent of this call).
6694   */
6695   DBUG_ASSERT(table->key_read == false);
6696   table->key_read = true;
6697 
6698   rowids_buf_cur = rowids_buf;
6699   /*
6700     Do not use ha_multi_range_read_next() as it would call the engine's
6701     overridden multi_range_read_next() but the default implementation is wanted.
6702   */
6703   while ((rowids_buf_cur < rowids_buf_end) &&
6704          !(res = h2->handler::multi_range_read_next(&range_info))) {
6705     KEY_MULTI_RANGE *curr_range = &h2->handler::mrr_cur_range;
6706     if (h2->mrr_funcs.skip_index_tuple &&
6707         h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6708       continue;
6709 
6710     /* Put rowid, or {rowid, range_id} pair into the buffer */
6711     h2->position(table->record[0]);
6712     memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6713     rowids_buf_cur += h2->ref_length;
6714 
6715     if (is_mrr_assoc) {
6716       memcpy(rowids_buf_cur, &range_info, sizeof(void *));
6717       rowids_buf_cur += sizeof(void *);
6718     }
6719   }
6720 
6721   // Restore key_read since the next read operation will read complete rows
6722   table->key_read = false;
6723 
6724   if (res && res != HA_ERR_END_OF_FILE) return res;
6725   dsmrr_eof = (res == HA_ERR_END_OF_FILE);
6726 
6727   /* Sort the buffer contents by rowid */
6728   uint elem_size = h->ref_length + (int)is_mrr_assoc * sizeof(void *);
6729   DBUG_ASSERT((rowids_buf_cur - rowids_buf) % elem_size == 0);
6730 
6731   varlen_sort(
6732       rowids_buf, rowids_buf_cur, elem_size,
6733       [this](const uchar *a, const uchar *b) { return h->cmp_ref(a, b) < 0; });
6734   rowids_buf_last = rowids_buf_cur;
6735   rowids_buf_cur = rowids_buf;
6736   return 0;
6737 }
6738 
6739 /*
6740   DS-MRR implementation: multi_range_read_next() function
6741 */
6742 
dsmrr_next(char ** range_info)6743 int DsMrr_impl::dsmrr_next(char **range_info) {
6744   int res;
6745   uchar *cur_range_info = nullptr;
6746   uchar *rowid;
6747 
6748   if (use_default_impl) return h->handler::multi_range_read_next(range_info);
6749 
6750   do {
6751     if (rowids_buf_cur == rowids_buf_last) {
6752       if (dsmrr_eof) {
6753         res = HA_ERR_END_OF_FILE;
6754         goto end;
6755       }
6756 
6757       res = dsmrr_fill_buffer();
6758       if (res) goto end;
6759     }
6760 
6761     /* return eof if there are no rowids in the buffer after re-fill attempt */
6762     if (rowids_buf_cur == rowids_buf_last) {
6763       res = HA_ERR_END_OF_FILE;
6764       goto end;
6765     }
6766     rowid = rowids_buf_cur;
6767 
6768     if (is_mrr_assoc)
6769       memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar *));
6770 
6771     rowids_buf_cur += h->ref_length + sizeof(void *) * is_mrr_assoc;
6772     if (h2->mrr_funcs.skip_record &&
6773         h2->mrr_funcs.skip_record(h2->mrr_iter, (char *)cur_range_info, rowid))
6774       continue;
6775     res = h->ha_rnd_pos(table->record[0], rowid);
6776     break;
6777   } while (true);
6778 
6779   if (is_mrr_assoc) {
6780     memcpy(range_info, rowid + h->ref_length, sizeof(void *));
6781   }
6782 end:
6783   return res;
6784 }
6785 
6786 /*
6787   DS-MRR implementation: multi_range_read_info() function
6788 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6789 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6790                                uint *bufsz, uint *flags, Cost_estimate *cost) {
6791   ha_rows res MY_ATTRIBUTE((unused));
6792   uint def_flags = *flags;
6793   uint def_bufsz = *bufsz;
6794 
6795   /* Get cost/flags/mem_usage of default MRR implementation */
6796   res = h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6797                                           &def_flags, cost);
6798   DBUG_ASSERT(!res);
6799 
6800   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6801       choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
6802     /* Default implementation is choosen */
6803     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6804     *flags = def_flags;
6805     *bufsz = def_bufsz;
6806     DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6807   } else {
6808     /* *flags and *bufsz were set by choose_mrr_impl */
6809     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6810   }
6811   return 0;
6812 }
6813 
6814 /*
6815   DS-MRR Implementation: multi_range_read_info_const() function
6816 */
6817 
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6818 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6819                                      void *seq_init_param, uint n_ranges,
6820                                      uint *bufsz, uint *flags,
6821                                      Cost_estimate *cost) {
6822   ha_rows rows;
6823   uint def_flags = *flags;
6824   uint def_bufsz = *bufsz;
6825   /* Get cost/flags/mem_usage of default MRR implementation */
6826   rows = h->handler::multi_range_read_info_const(
6827       keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
6828   if (rows == HA_POS_ERROR) {
6829     /* Default implementation can't perform MRR scan => we can't either */
6830     return rows;
6831   }
6832 
6833   /*
6834     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6835     use the default MRR implementation (we need it for UPDATE/DELETE).
6836     Otherwise, make a choice based on cost and mrr* flags of
6837     @@optimizer_switch.
6838   */
6839   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6840       choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
6841     DBUG_PRINT("info", ("Default MRR implementation choosen"));
6842     *flags = def_flags;
6843     *bufsz = def_bufsz;
6844     DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6845   } else {
6846     /* *flags and *bufsz were set by choose_mrr_impl */
6847     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6848   }
6849   return rows;
6850 }
6851 
6852 /**
6853   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6854 
6855   Make the choice between using Default MRR implementation and DS-MRR.
6856   This function contains common functionality factored out of dsmrr_info()
6857   and dsmrr_info_const(). The function assumes that the default MRR
6858   implementation's applicability requirements are satisfied.
6859 
6860   @param keyno       Index number
6861   @param rows        E(full rows to be retrieved)
6862   @param flags  IN   MRR flags provided by the MRR user
6863                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
6864                      else the value is not modified
6865   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
6866                      else the value is not modified
6867   @param cost   IN   Cost of default MRR implementation
6868                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
6869                      else the value is not modified
6870 
6871   @retval true   Default MRR implementation should be used
6872   @retval false  DS-MRR implementation should be used
6873 */
6874 
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)6875 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
6876                                  uint *bufsz, Cost_estimate *cost) {
6877   bool res;
6878   THD *thd = current_thd;
6879   TABLE_LIST *tl = table->pos_in_table_list;
6880   const bool mrr_on =
6881       hint_key_state(thd, tl, keyno, MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR);
6882   const bool force_dsmrr_by_hints =
6883       hint_key_state(thd, tl, keyno, MRR_HINT_ENUM, 0) ||
6884       hint_table_state(thd, tl, BKA_HINT_ENUM, 0);
6885 
6886   if (!(mrr_on || force_dsmrr_by_hints) ||
6887       *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) ||  // Unsupported by DS-MRR
6888       (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
6889       key_uses_partial_cols(table, keyno) ||
6890       table->s->tmp_table != NO_TMP_TABLE) {
6891     /* Use the default implementation, don't modify args: See comments  */
6892     return true;
6893   }
6894 
6895   /*
6896     If @@optimizer_switch has "mrr_cost_based" on, we should avoid
6897     using DS-MRR for queries where it is likely that the records are
6898     stored in memory. Since there is currently no way to determine
6899     this, we use a heuristic:
6900     a) if the storage engine has a memory buffer, DS-MRR is only
6901        considered if the table size is bigger than the buffer.
6902     b) if the storage engine does not have a memory buffer, DS-MRR is
6903        only considered if the table size is bigger than 100MB.
6904     c) Since there is an initial setup cost of DS-MRR, so it is only
6905        considered if at least 50 records will be read.
6906   */
6907   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
6908       !force_dsmrr_by_hints) {
6909     /*
6910       If the storage engine has a database buffer we use this as the
6911       minimum size the table should have before considering DS-MRR.
6912     */
6913     longlong min_file_size = table->file->get_memory_buffer_size();
6914     if (min_file_size == -1) {
6915       // No estimate for database buffer
6916       min_file_size = 100 * 1024 * 1024;  // 100 MB
6917     }
6918 
6919     if (table->file->stats.data_file_length <
6920             static_cast<ulonglong>(min_file_size) ||
6921         rows <= 50)
6922       return true;  // Use the default implementation
6923   }
6924 
6925   Cost_estimate dsmrr_cost;
6926   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
6927     return true;
6928 
6929   /*
6930     If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
6931     of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
6932     allows one to force use of DS-MRR whenever it is applicable without
6933     affecting other cost-based choices. Note that if MRR or BKA hint is
6934     specified, DS-MRR will be used regardless of cost.
6935   */
6936   const bool force_dsmrr =
6937       (force_dsmrr_by_hints ||
6938        !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
6939 
6940   if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
6941     dsmrr_cost = *cost;
6942 
6943   if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost())) {
6944     *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
6945     *flags &= ~HA_MRR_SUPPORT_SORTED;   /* We can't provide ordered output */
6946     *cost = dsmrr_cost;
6947     res = false;
6948   } else {
6949     /* Use the default MRR implementation */
6950     res = true;
6951   }
6952   return res;
6953 }
6954 
6955 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
6956                                     Cost_estimate *cost);
6957 
6958 /**
6959   Get cost of DS-MRR scan
6960 
6961   @param keynr              Index to be used
6962   @param rows               E(Number of rows to be scanned)
6963   @param flags              Scan parameters (HA_MRR_* flags)
6964   @param buffer_size INOUT  Buffer size
6965   @param cost        OUT    The cost
6966 
6967   @retval false  OK
6968   @retval true   Error, DS-MRR cannot be used (the buffer is too small
6969                  for even 1 rowid)
6970 */
6971 
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)6972 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
6973                                          uint *buffer_size,
6974                                          Cost_estimate *cost) {
6975   ha_rows rows_in_last_step;
6976   uint n_full_steps;
6977 
6978   const uint elem_size =
6979       h->ref_length + sizeof(void *) * !(flags & HA_MRR_NO_ASSOCIATION);
6980   const ha_rows max_buff_entries = *buffer_size / elem_size;
6981 
6982   if (!max_buff_entries)
6983     return true; /* Buffer has not enough space for even 1 rowid */
6984 
6985   /* Number of iterations we'll make with full buffer */
6986   n_full_steps = (uint)floor(rows2double(rows) / max_buff_entries);
6987 
6988   /*
6989     Get numbers of rows we'll be processing in last iteration, with
6990     non-full buffer
6991   */
6992   rows_in_last_step = rows % max_buff_entries;
6993 
6994   DBUG_ASSERT(cost->is_zero());
6995 
6996   if (n_full_steps) {
6997     get_sort_and_sweep_cost(table, max_buff_entries, cost);
6998     cost->multiply(n_full_steps);
6999   } else {
7000     /*
7001       Adjust buffer size since only parts of the buffer will be used:
7002       1. Adjust record estimate for the last scan to reduce likelyhood
7003          of needing more than one scan by adding 20 percent to the
7004          record estimate and by ensuring this is at least 100 records.
7005       2. If the estimated needed buffer size is lower than suggested by
7006          the caller then set it to the estimated buffer size.
7007     */
7008     const ha_rows keys_in_buffer =
7009         max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7010     *buffer_size = min<ulong>(*buffer_size,
7011                               static_cast<ulong>(keys_in_buffer) * elem_size);
7012   }
7013 
7014   Cost_estimate last_step_cost;
7015   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7016   (*cost) += last_step_cost;
7017 
7018   /*
7019     Cost of memory is not included in the total_cost() function and
7020     thus will not be considered when comparing costs. Still, we
7021     record it in the cost estimate object for future use.
7022   */
7023   cost->add_mem(*buffer_size);
7024 
7025   /* Total cost of all index accesses */
7026   (*cost) += h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7027 
7028   /*
7029     Add CPU cost for processing records (see
7030     @handler::multi_range_read_info_const()).
7031   */
7032   cost->add_cpu(
7033       table->cost_model()->row_evaluate_cost(static_cast<double>(rows)));
7034   return false;
7035 }
7036 
7037 /*
7038   Get cost of one sort-and-sweep step
7039 
7040   SYNOPSIS
7041     get_sort_and_sweep_cost()
7042       table       Table being accessed
7043       nrows       Number of rows to be sorted and retrieved
7044       cost   OUT  The cost
7045 
7046   DESCRIPTION
7047     Get cost of these operations:
7048      - sort an array of #nrows ROWIDs using qsort
7049      - read #nrows records from table in a sweep.
7050 */
7051 
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7052 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7053                                     Cost_estimate *cost) {
7054   DBUG_ASSERT(cost->is_zero());
7055   if (nrows) {
7056     get_sweep_read_cost(table, nrows, false, cost);
7057 
7058     /*
7059       @todo CostModel: For the old version of the cost model the
7060       following code should be used. For the new version of the cost
7061       model Cost_model::key_compare_cost() should be used.  When
7062       removing support for the old cost model this code should be
7063       removed. The reason for this is that we should get rid of the
7064       ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7065       the current value returned by key_compare_cost() this would
7066       overestimate the cost for sorting.
7067     */
7068 
7069     /*
7070       Constant for the cost of doing one key compare operation in the
7071       sort operation. We should have used the value returned by
7072       key_compare_cost() here but this would make the cost
7073       estimate of sorting very high for queries accessing many
7074       records. Until this constant is adjusted we introduce a constant
7075       that is more realistic. @todo: Replace this with
7076       key_compare_cost() when this has been given a realistic value.
7077     */
7078     const double ROWID_COMPARE_SORT_COST =
7079         table->cost_model()->key_compare_cost(1.0) / 10;
7080 
7081     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7082 
7083     // For the old version of the cost model this cost calculations should
7084     // be used....
7085     const double cpu_sort = nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7086     // .... For the new cost model something like this should be used...
7087     // cpu_sort= nrows * log2(nrows) *
7088     //           table->cost_model()->rowid_compare_cost();
7089     cost->add_cpu(cpu_sort);
7090   }
7091 }
7092 
7093 /**
7094   Get cost of reading nrows table records in a "disk sweep"
7095 
7096   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7097   for an ordered sequence of rowids.
7098 
7099   We take into account that some of the records might be in a memory
7100   buffer while others need to be read from a secondary storage
7101   device. The model for this assumes hard disk IO. A disk read is
7102   performed as follows:
7103 
7104    1. The disk head is moved to the needed cylinder
7105    2. The controller waits for the plate to rotate
7106    3. The data is transferred
7107 
7108   Time to do #3 is insignificant compared to #2+#1.
7109 
7110   Time to move the disk head is proportional to head travel distance.
7111 
7112   Time to wait for the plate to rotate depends on whether the disk head
7113   was moved or not.
7114 
7115   If disk head wasn't moved, the wait time is proportional to distance
7116   between the previous block and the block we're reading.
7117 
7118   If the head was moved, we don't know how much we'll need to wait for the
7119   plate to rotate. We assume the wait time to be a variate with a mean of
7120   0.5 of full rotation time.
7121 
7122   Our cost units are "random disk seeks". The cost of random disk seek is
7123   actually not a constant, it depends one range of cylinders we're going
7124   to access. We make it constant by introducing a fuzzy concept of "typical
7125   datafile length" (it's fuzzy as it's hard to tell whether it should
7126   include index file, temp.tables etc). Then random seek cost is:
7127 
7128     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7129 
7130   We define half_rotation_cost as disk_seek_base_cost() (see
7131   Cost_model_server::disk_seek_base_cost()).
7132 
7133   @param      table        Table to be accessed
7134   @param      nrows        Number of rows to retrieve
7135   @param      interrupted  true <=> Assume that the disk sweep will be
7136                            interrupted by other disk IO. false - otherwise.
7137   @param[out] cost         the cost
7138 */
7139 
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7140 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7141                          Cost_estimate *cost) {
7142   DBUG_TRACE;
7143 
7144   DBUG_ASSERT(cost->is_zero());
7145   if (nrows > 0) {
7146     const Cost_model_table *const cost_model = table->cost_model();
7147 
7148     // The total number of blocks used by this table
7149     double n_blocks =
7150         ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7151     if (n_blocks < 1.0)  // When data_file_length is 0
7152       n_blocks = 1.0;
7153 
7154     /*
7155       The number of blocks that in average need to be read given that
7156       the records are uniformly distribution over the table.
7157     */
7158     double busy_blocks =
7159         n_blocks * (1.0 - pow(1.0 - 1.0 / n_blocks, rows2double(nrows)));
7160     if (busy_blocks < 1.0) busy_blocks = 1.0;
7161 
7162     DBUG_PRINT("info",
7163                ("sweep: nblocks=%g, busy_blocks=%g", n_blocks, busy_blocks));
7164     /*
7165       The random access cost for reading the data pages will be the upper
7166       limit for the sweep_cost.
7167     */
7168     cost->add_io(cost_model->page_read_cost(busy_blocks));
7169     if (!interrupted) {
7170       Cost_estimate sweep_cost;
7171       /*
7172         Assume reading pages from disk is done in one 'sweep'.
7173 
7174         The cost model and cost estimate for pages already in a memory
7175         buffer will be different from pages that needed to be read from
7176         disk. Calculate the number of blocks that likely already are
7177         in memory and the number of blocks that need to be read from
7178         disk.
7179       */
7180       const double busy_blocks_mem =
7181           busy_blocks * table->file->table_in_memory_estimate();
7182       const double busy_blocks_disk = busy_blocks - busy_blocks_mem;
7183       DBUG_ASSERT(busy_blocks_disk >= 0.0);
7184 
7185       // Cost of accessing blocks in main memory buffer
7186       sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7187 
7188       // Cost of reading blocks from disk in a 'sweep'
7189       const double seek_distance =
7190           (busy_blocks_disk > 1.0) ? n_blocks / busy_blocks_disk : n_blocks;
7191 
7192       const double disk_cost =
7193           busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7194       sweep_cost.add_io(disk_cost);
7195 
7196       /*
7197         For some cases, ex: when only few blocks need to be read and the
7198         seek distance becomes very large, the sweep cost model can produce
7199         a cost estimate that is larger than the cost of random access.
7200         To handle this case, we use the sweep cost only when it is less
7201         than the random access cost.
7202       */
7203       if (sweep_cost < *cost) *cost = sweep_cost;
7204     }
7205   }
7206   DBUG_PRINT("info", ("returning cost=%g", cost->total_cost()));
7207 }
7208 
7209 /****************************************************************************
7210  * DS-MRR implementation ends
7211  ***************************************************************************/
7212 
7213 /** @brief
7214   Read first row between two ranges.
7215   Store ranges for future calls to read_range_next.
7216 
7217   @param start_key		Start key. Is 0 if no min range
7218   @param end_key		End key.  Is 0 if no max range
7219   @param eq_range_arg	        Set to 1 if start_key == end_key
7220   @param sorted		Set to 1 if result should be sorted per key
7221 
7222   @note
7223     Record is read into table->record[0]
7224 
7225   @retval
7226     0			Found row
7227   @retval
7228     HA_ERR_END_OF_FILE	No rows in range
7229 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted MY_ATTRIBUTE ((unused)))7230 int handler::read_range_first(const key_range *start_key,
7231                               const key_range *end_key, bool eq_range_arg,
7232                               bool sorted MY_ATTRIBUTE((unused))) {
7233   int result;
7234   DBUG_TRACE;
7235 
7236   eq_range = eq_range_arg;
7237   set_end_range(end_key, RANGE_SCAN_ASC);
7238 
7239   range_key_part = table->key_info[active_index].key_part;
7240 
7241   if (!start_key)  // Read first record
7242     result = ha_index_first(table->record[0]);
7243   else
7244     result = ha_index_read_map(table->record[0], start_key->key,
7245                                start_key->keypart_map, start_key->flag);
7246   if (result)
7247     return (result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result;
7248 
7249   if (compare_key(end_range) > 0) {
7250     /*
7251       The last read row does not fall in the range. So request
7252       storage engine to release row lock if possible.
7253     */
7254     unlock_row();
7255     result = HA_ERR_END_OF_FILE;
7256   }
7257   return result;
7258 }
7259 
ha_read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range,bool sorted)7260 int handler::ha_read_range_first(const key_range *start_key,
7261                                  const key_range *end_key, bool eq_range,
7262                                  bool sorted) {
7263   int result;
7264   DBUG_TRACE;
7265 
7266   // Set status for the need to update generated fields
7267   m_update_generated_read_fields = table->has_gcol();
7268 
7269   result = read_range_first(start_key, end_key, eq_range, sorted);
7270   if (!result && m_update_generated_read_fields) {
7271     result =
7272         update_generated_read_fields(table->record[0], table, active_index);
7273     m_update_generated_read_fields = false;
7274   }
7275   table->set_row_status_from_handler(result);
7276   return result;
7277 }
7278 
ha_read_range_next()7279 int handler::ha_read_range_next() {
7280   int result;
7281   DBUG_TRACE;
7282 
7283   // Set status for the need to update generated fields
7284   m_update_generated_read_fields = table->has_gcol();
7285 
7286   result = read_range_next();
7287   if (!result && m_update_generated_read_fields) {
7288     result =
7289         update_generated_read_fields(table->record[0], table, active_index);
7290     m_update_generated_read_fields = false;
7291   }
7292   table->set_row_status_from_handler(result);
7293   return result;
7294 }
7295 
7296 /** @brief
7297   Read next row between two endpoints.
7298 
7299   @note
7300     Record is read into table->record[0]
7301 
7302   @retval
7303     0			Found row
7304   @retval
7305     HA_ERR_END_OF_FILE	No rows in range
7306 */
read_range_next()7307 int handler::read_range_next() {
7308   DBUG_TRACE;
7309 
7310   int result;
7311   if (eq_range) {
7312     /* We trust that index_next_same always gives a row in range */
7313     result =
7314         ha_index_next_same(table->record[0], end_range->key, end_range->length);
7315   } else {
7316     result = ha_index_next(table->record[0]);
7317     if (result) return result;
7318 
7319     if (compare_key(end_range) > 0) {
7320       /*
7321         The last read row does not fall in the range. So request
7322         storage engine to release row lock if possible.
7323       */
7324       unlock_row();
7325       result = HA_ERR_END_OF_FILE;
7326     }
7327   }
7328   return result;
7329 }
7330 
7331 /**
7332   Check if one of the columns in a key is a virtual generated column.
7333 
7334   @param part    the first part of the key to check
7335   @param length  the length of the key
7336   @retval true   if the key contains a virtual generated column
7337   @retval false  if the key does not contain a virtual generated column
7338 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7339 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7340   for (uint len = 0; len < length; len += part->store_length, ++part)
7341     if (part->field->is_virtual_gcol()) return true;
7342   return false;
7343 }
7344 
set_end_range(const key_range * range,enum_range_scan_direction direction)7345 void handler::set_end_range(const key_range *range,
7346                             enum_range_scan_direction direction) {
7347   if (range) {
7348     save_end_range = *range;
7349     end_range = &save_end_range;
7350     range_key_part = table->key_info[active_index].key_part;
7351     key_compare_result_on_equal =
7352         ((range->flag == HA_READ_BEFORE_KEY)
7353              ? 1
7354              : (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7355     m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7356   } else
7357     end_range = nullptr;
7358 
7359   /*
7360     Clear the out-of-range flag in the record buffer when a new range is
7361     started. Also set the in_range_check_pushed_down flag, since the
7362     storage engine needs to do the evaluation of the end-range to avoid
7363     filling the record buffer with out-of-range records.
7364   */
7365   if (m_record_buffer != nullptr) {
7366     m_record_buffer->set_out_of_range(false);
7367     in_range_check_pushed_down = true;
7368   }
7369 
7370   range_scan_direction = direction;
7371 }
7372 
7373 /**
7374   Compare if found key (in row) is over max-value.
7375 
7376   @param range		range to compare to row. May be 0 for no range
7377 
7378   @sa
7379     key.cc::key_cmp()
7380 
7381   @return
7382     The return value is SIGN(key_in_row - range_key):
7383 
7384     - 0   : Key is equal to range or 'range' == 0 (no range)
7385     - -1  : Key is less than range
7386     - 1   : Key is larger than range
7387 */
compare_key(key_range * range)7388 int handler::compare_key(key_range *range) {
7389   int cmp;
7390   if (!range || in_range_check_pushed_down) return 0;  // No max range
7391   cmp = key_cmp(range_key_part, range->key, range->length);
7392   if (!cmp) cmp = key_compare_result_on_equal;
7393   return cmp;
7394 }
7395 
7396 /*
7397   Compare if a found key (in row) is within the range.
7398 
7399   This function is similar to compare_key() but checks the range scan
7400   direction to determine if this is a descending scan. This function
7401   is used by the index condition pushdown implementation to determine
7402   if the read record is within the range scan.
7403 
7404   @param range Range to compare to row. May be NULL for no range.
7405 
7406   @seealso
7407     handler::compare_key()
7408 
7409   @return Returns whether the key is within the range
7410 
7411     - 0   : Key is equal to range or 'range' == 0 (no range)
7412     - -1  : Key is within the current range
7413     - 1   : Key is outside the current range
7414 */
7415 
compare_key_icp(const key_range * range) const7416 int handler::compare_key_icp(const key_range *range) const {
7417   int cmp;
7418   if (!range) return 0;  // no max range
7419   cmp = key_cmp(range_key_part, range->key, range->length);
7420   if (!cmp) cmp = key_compare_result_on_equal;
7421   if (range_scan_direction == RANGE_SCAN_DESC) cmp = -cmp;
7422   return cmp;
7423 }
7424 
7425 /**
7426   Change the offsets of all the fields in a key range.
7427 
7428   @param range     the key range
7429   @param key_part  the first key part
7430   @param diff      how much to change the offsets with
7431 */
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,ptrdiff_t diff)7432 static inline void move_key_field_offsets(const key_range *range,
7433                                           const KEY_PART_INFO *key_part,
7434                                           ptrdiff_t diff) {
7435   for (size_t len = 0; len < range->length;
7436        len += key_part->store_length, ++key_part)
7437     key_part->field->move_field_offset(diff);
7438 }
7439 
7440 /**
7441   Check if the key in the given buffer (which is not necessarily
7442   TABLE::record[0]) is within range. Called by the storage engine to
7443   avoid reading too many rows.
7444 
7445   @param buf  the buffer that holds the key
7446   @retval -1 if the key is within the range
7447   @retval  0 if the key is equal to the end_range key, and
7448              key_compare_result_on_equal is 0
7449   @retval  1 if the key is outside the range
7450 */
compare_key_in_buffer(const uchar * buf) const7451 int handler::compare_key_in_buffer(const uchar *buf) const {
7452   DBUG_ASSERT(end_range != nullptr && (m_record_buffer == nullptr ||
7453                                        !m_record_buffer->is_out_of_range()));
7454 
7455   /*
7456     End range on descending scans is only checked with ICP for now, and then we
7457     check it with compare_key_icp() instead of this function.
7458   */
7459   DBUG_ASSERT(range_scan_direction == RANGE_SCAN_ASC);
7460 
7461   // Make the fields in the key point into the buffer instead of record[0].
7462   const ptrdiff_t diff = buf - table->record[0];
7463   if (diff != 0) move_key_field_offsets(end_range, range_key_part, diff);
7464 
7465   // Compare the key in buf against end_range.
7466   int cmp = key_cmp(range_key_part, end_range->key, end_range->length);
7467   if (cmp == 0) cmp = key_compare_result_on_equal;
7468 
7469   // Reset the field offsets.
7470   if (diff != 0) move_key_field_offsets(end_range, range_key_part, -diff);
7471 
7472   return cmp;
7473 }
7474 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7475 int handler::index_read_idx_map(uchar *buf, uint index, const uchar *key,
7476                                 key_part_map keypart_map,
7477                                 enum ha_rkey_function find_flag) {
7478   int error, error1 = 0;
7479   error = index_init(index, false);
7480   if (!error) {
7481     error = index_read_map(buf, key, keypart_map, find_flag);
7482     error1 = index_end();
7483   }
7484   return error ? error : error1;
7485 }
7486 
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7487 uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map) {
7488   /* works only with key prefixes */
7489   DBUG_ASSERT(((keypart_map + 1) & keypart_map) == 0);
7490 
7491   KEY *key_info = table->key_info + key;
7492   KEY_PART_INFO *key_part = key_info->key_part;
7493   KEY_PART_INFO *end_key_part = key_part + actual_key_parts(key_info);
7494   uint length = 0;
7495 
7496   while (key_part < end_key_part && keypart_map) {
7497     length += key_part->store_length;
7498     keypart_map >>= 1;
7499     key_part++;
7500   }
7501   return length;
7502 }
7503 
7504 /**
7505   Returns a list of all known extensions.
7506 
7507     No mutexes, worst case race is a minor surplus memory allocation
7508     We have to recreate the extension map if mysqld is restarted (for example
7509     within libmysqld)
7510 
7511   @retval
7512     pointer		pointer to TYPELIB structure
7513 */
exts_handlerton(THD *,plugin_ref plugin,void * arg)7514 static bool exts_handlerton(THD *, plugin_ref plugin, void *arg) {
7515   List<const char> *found_exts = static_cast<List<const char> *>(arg);
7516   handlerton *hton = plugin_data<handlerton *>(plugin);
7517   if (hton->state == SHOW_OPTION_YES && hton->file_extensions) {
7518     List_iterator_fast<const char> it(*found_exts);
7519     const char **ext, *old_ext;
7520 
7521     for (ext = hton->file_extensions; *ext; ext++) {
7522       while ((old_ext = it++)) {
7523         if (!strcmp(old_ext, *ext)) break;
7524       }
7525       if (!old_ext) found_exts->push_back(*ext);
7526 
7527       it.rewind();
7528     }
7529   }
7530   return false;
7531 }
7532 
ha_known_exts()7533 TYPELIB *ha_known_exts() {
7534   TYPELIB *known_extensions = (TYPELIB *)(*THR_MALLOC)->Alloc(sizeof(TYPELIB));
7535   known_extensions->name = "known_exts";
7536   known_extensions->type_lengths = nullptr;
7537 
7538   List<const char> found_exts;
7539   const char **ext, *old_ext;
7540 
7541   plugin_foreach(nullptr, exts_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
7542                  &found_exts);
7543 
7544   size_t arr_length = sizeof(char *) * (found_exts.elements + 1);
7545   ext = (const char **)(*THR_MALLOC)->Alloc(arr_length);
7546 
7547   DBUG_ASSERT(nullptr != ext);
7548   known_extensions->count = found_exts.elements;
7549   known_extensions->type_names = ext;
7550 
7551   List_iterator_fast<const char> it(found_exts);
7552   while ((old_ext = it++)) *ext++ = old_ext;
7553   *ext = nullptr;
7554   return known_extensions;
7555 }
7556 
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7557 static bool stat_print(THD *thd, const char *type, size_t type_len,
7558                        const char *file, size_t file_len, const char *status,
7559                        size_t status_len) {
7560   Protocol *protocol = thd->get_protocol();
7561   protocol->start_row();
7562   protocol->store_string(type, type_len, system_charset_info);
7563   protocol->store_string(file, file_len, system_charset_info);
7564   protocol->store_string(status, status_len, system_charset_info);
7565   if (protocol->end_row()) return true;
7566   return false;
7567 }
7568 
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7569 static bool showstat_handlerton(THD *thd, plugin_ref plugin, void *arg) {
7570   enum ha_stat_type stat = *(enum ha_stat_type *)arg;
7571   handlerton *hton = plugin_data<handlerton *>(plugin);
7572   if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7573       hton->show_status(hton, thd, stat_print, stat))
7574     return true;
7575   return false;
7576 }
7577 
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7578 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) {
7579   List<Item> field_list;
7580   bool result;
7581 
7582   field_list.push_back(new Item_empty_string("Type", 10));
7583   field_list.push_back(new Item_empty_string("Name", FN_REFLEN));
7584   field_list.push_back(new Item_empty_string("Status", 10));
7585 
7586   if (thd->send_result_metadata(&field_list,
7587                                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7588     return true;
7589 
7590   if (db_type == nullptr) {
7591     result = plugin_foreach(thd, showstat_handlerton,
7592                             MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7593   } else {
7594     if (db_type->state != SHOW_OPTION_YES) {
7595       const LEX_CSTRING *name = &se_plugin_array[db_type->slot]->name;
7596       result = stat_print(thd, name->str, name->length, "", 0, "DISABLED", 8)
7597                    ? true
7598                    : false;
7599     } else {
7600       DBUG_EXECUTE_IF("simulate_show_status_failure",
7601                       DBUG_SET("+d,simulate_net_write_failure"););
7602       result = db_type->show_status &&
7603                        db_type->show_status(db_type, thd, stat_print, stat)
7604                    ? true
7605                    : false;
7606       DBUG_EXECUTE_IF("simulate_show_status_failure",
7607                       DBUG_SET("-d,simulate_net_write_failure"););
7608     }
7609   }
7610 
7611   if (!result) my_eof(thd);
7612   return result;
7613 }
7614 
7615 /*
7616   Function to check if the conditions for row-based binlogging is
7617   correct for the table.
7618 
7619   A row in the given table should be replicated if:
7620   - Row-based replication is enabled in the current thread
7621   - The binlog is enabled
7622   - It is not a temporary table
7623   - The binary log is open
7624   - The database the table resides in shall be binlogged (binlog_*_db rules)
7625   - table is not mysql.event
7626 */
7627 
check_table_binlog_row_based(THD * thd,TABLE * table)7628 static bool check_table_binlog_row_based(THD *thd, TABLE *table) {
7629   if (table->s->cached_row_logging_check == -1) {
7630     int const check(table->s->tmp_table == NO_TMP_TABLE &&
7631                     !table->no_replicate &&
7632                     binlog_filter->db_ok(table->s->db.str));
7633     table->s->cached_row_logging_check = check;
7634   }
7635 
7636   DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
7637               table->s->cached_row_logging_check == 1);
7638 
7639   return (thd->is_current_stmt_binlog_format_row() &&
7640           table->s->cached_row_logging_check &&
7641           (thd->variables.option_bits & OPTION_BIN_LOG) &&
7642           mysql_bin_log.is_open());
7643 }
7644 
7645 /** @brief
7646    Write table maps for all (manually or automatically) locked tables
7647    to the binary log.
7648 
7649    SYNOPSIS
7650      write_locked_table_maps()
7651        thd     Pointer to THD structure
7652 
7653    DESCRIPTION
7654        This function will generate and write table maps for all tables
7655        that are locked by the thread 'thd'.
7656 
7657    RETURN VALUE
7658        0   All OK
7659        1   Failed to write all table maps
7660 
7661    SEE ALSO
7662        THD::lock
7663 */
7664 
write_locked_table_maps(THD * thd)7665 static int write_locked_table_maps(THD *thd) {
7666   DBUG_TRACE;
7667   DBUG_PRINT("enter", ("thd: %p  thd->lock: %p "
7668                        "thd->extra_lock: %p",
7669                        thd, thd->lock, thd->extra_lock));
7670 
7671   DBUG_PRINT("debug",
7672              ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7673 
7674   if (thd->get_binlog_table_maps() == 0) {
7675     for (MYSQL_LOCK *lock : {thd->extra_lock, thd->lock}) {
7676       if (lock == nullptr) continue;
7677 
7678       bool need_binlog_rows_query = thd->variables.binlog_rows_query_log_events;
7679       TABLE **const end_ptr = lock->table + lock->table_count;
7680       for (TABLE **table_ptr = lock->table; table_ptr != end_ptr; ++table_ptr) {
7681         TABLE *const table = *table_ptr;
7682         DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7683         if (table->current_lock == F_WRLCK &&
7684             check_table_binlog_row_based(thd, table)) {
7685           /*
7686             We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7687             (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7688             compatible behavior with the STMT based replication even when
7689             the table is not transactional. In other words, if the operation
7690             fails while executing the insert phase nothing is written to the
7691             binlog.
7692 
7693             Note that at this point, we check the type of a set of tables to
7694             create the table map events. In the function binlog_log_row(),
7695             which calls the current function, we check the type of the table
7696             of the current row.
7697           */
7698           bool const has_trans = thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7699                                  table->file->has_transactions();
7700           int const error = thd->binlog_write_table_map(table, has_trans,
7701                                                         need_binlog_rows_query);
7702           /* Binlog Rows_query log event once for one statement which updates
7703              two or more tables.*/
7704           if (need_binlog_rows_query) need_binlog_rows_query = false;
7705           /*
7706             If an error occurs, it is the responsibility of the caller to
7707             roll back the transaction.
7708           */
7709           if (unlikely(error)) return 1;
7710         }
7711       }
7712     }
7713   }
7714   return 0;
7715 }
7716 
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7717 int binlog_log_row(TABLE *table, const uchar *before_record,
7718                    const uchar *after_record, Log_func *log_func) {
7719   bool error = false;
7720   THD *const thd = table->in_use;
7721 
7722   if (check_table_binlog_row_based(thd, table)) {
7723     if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF) {
7724       if (before_record && after_record) {
7725         /* capture both images pke */
7726         add_pke(table, thd, table->record[0]);
7727         add_pke(table, thd, table->record[1]);
7728       } else {
7729         add_pke(table, thd, table->record[0]);
7730       }
7731     }
7732     if (table->in_use->is_error()) return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7733 
7734     DBUG_DUMP("read_set 10", (uchar *)table->read_set->bitmap,
7735               (table->s->fields + 7) / 8);
7736 
7737     /*
7738       If there are no table maps written to the binary log, this is
7739       the first row handled in this statement. In that case, we need
7740       to write table maps for all locked tables to the binary log.
7741     */
7742     if (likely(!(error = write_locked_table_maps(thd)))) {
7743       /*
7744         We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7745         (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7746         compatible behavior with the STMT based replication even when
7747         the table is not transactional. In other words, if the operation
7748         fails while executing the insert phase nothing is written to the
7749         binlog.
7750       */
7751       bool const has_trans = thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7752                              table->file->has_transactions();
7753       error = (*log_func)(thd, table, has_trans, before_record, after_record);
7754     }
7755   }
7756 
7757   return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7758 }
7759 
ha_external_lock(THD * thd,int lock_type)7760 int handler::ha_external_lock(THD *thd, int lock_type) {
7761   int error;
7762   DBUG_TRACE;
7763   /*
7764     Whether this is lock or unlock, this should be true, and is to verify that
7765     if get_auto_increment() was called (thus may have reserved intervals or
7766     taken a table lock), ha_release_auto_increment() was too.
7767   */
7768   DBUG_ASSERT(next_insert_id == 0);
7769   /* Consecutive calls for lock without unlocking in between is not allowed */
7770   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7771               ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
7772                lock_type == F_UNLCK));
7773   /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
7774   DBUG_ASSERT(inited == NONE || table->open_by_handler);
7775 
7776   ha_statistic_increment(&System_status_var::ha_external_lock_count);
7777 
7778   MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
7779                         { error = external_lock(thd, lock_type); })
7780 
7781   /*
7782     We cache the table flags if the locking succeeded. Otherwise, we
7783     keep them as they were when they were fetched in ha_open().
7784   */
7785 
7786   if (error == 0) {
7787     /*
7788       The lock type is needed by MRR when creating a clone of this handler
7789       object.
7790     */
7791     m_lock_type = lock_type;
7792     cached_table_flags = table_flags();
7793   }
7794 
7795   return error;
7796 }
7797 
7798 /** @brief
7799   Check handler usage and reset state of file to after 'open'
7800 
7801   @note can be called regardless of it is locked or not.
7802 */
ha_reset()7803 int handler::ha_reset() {
7804   DBUG_TRACE;
7805   /* Check that we have called all proper deallocation functions */
7806   DBUG_ASSERT((uchar *)table->def_read_set.bitmap +
7807                   table->s->column_bitmap_size ==
7808               (uchar *)table->def_write_set.bitmap);
7809   DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
7810   DBUG_ASSERT(table->key_read == 0);
7811   /* ensure that ha_index_end / ha_rnd_end has been called */
7812   DBUG_ASSERT(inited == NONE);
7813   /* Free cache used by filesort */
7814   free_io_cache(table);
7815   /* reset the bitmaps to point to defaults */
7816   table->default_column_bitmaps();
7817   /* Reset the handler flags used for dupilcate record handling */
7818   table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
7819   table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
7820   /* Reset information about pushed engine conditions */
7821   pushed_cond = nullptr;
7822   /* Reset information about pushed index conditions */
7823   cancel_pushed_idx_cond();
7824   // Forget the record buffer.
7825   m_record_buffer = nullptr;
7826   m_unique = nullptr;
7827 
7828   const int retval = reset();
7829   return retval;
7830 }
7831 
ha_write_row(uchar * buf)7832 int handler::ha_write_row(uchar *buf) {
7833   int error;
7834   Log_func *log_func = Write_rows_log_event::binlog_row_logging_function;
7835   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7836 
7837   DBUG_TRACE;
7838   DBUG_EXECUTE_IF("inject_error_ha_write_row", return HA_ERR_INTERNAL_ERROR;);
7839   DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
7840                   return HA_ERR_SE_OUT_OF_MEMORY;);
7841   mark_trx_read_write();
7842 
7843   DBUG_EXECUTE_IF(
7844       "handler_crashed_table_on_usage",
7845       my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7846       set_my_errno(HA_ERR_CRASHED); return HA_ERR_CRASHED;);
7847 
7848   MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
7849                       { error = write_row(buf); })
7850 
7851   if (unlikely(error)) return error;
7852 
7853   if (unlikely((error = binlog_log_row(table, nullptr, buf, log_func))))
7854     return error; /* purecov: inspected */
7855 
7856   DEBUG_SYNC_C("ha_write_row_end");
7857   return 0;
7858 }
7859 
ha_update_row(const uchar * old_data,uchar * new_data)7860 int handler::ha_update_row(const uchar *old_data, uchar *new_data) {
7861   int error;
7862   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7863   Log_func *log_func = Update_rows_log_event::binlog_row_logging_function;
7864 
7865   /*
7866     Some storage engines require that the new record is in record[0]
7867     (and the old record is in record[1]).
7868    */
7869   DBUG_ASSERT(new_data == table->record[0]);
7870   DBUG_ASSERT(old_data == table->record[1]);
7871 
7872   mark_trx_read_write();
7873 
7874   DBUG_EXECUTE_IF(
7875       "handler_crashed_table_on_usage",
7876       my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7877       set_my_errno(HA_ERR_CRASHED); return (HA_ERR_CRASHED););
7878 
7879   MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
7880                       { error = update_row(old_data, new_data); })
7881 
7882   if (unlikely(error)) return error;
7883   if (unlikely((error = binlog_log_row(table, old_data, new_data, log_func))))
7884     return error;
7885   return 0;
7886 }
7887 
ha_delete_row(const uchar * buf)7888 int handler::ha_delete_row(const uchar *buf) {
7889   int error;
7890   DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7891   Log_func *log_func = Delete_rows_log_event::binlog_row_logging_function;
7892   /*
7893     Normally table->record[0] is used, but sometimes table->record[1] is used.
7894   */
7895   DBUG_ASSERT(buf == table->record[0] || buf == table->record[1]);
7896   DBUG_EXECUTE_IF("inject_error_ha_delete_row", return HA_ERR_INTERNAL_ERROR;);
7897 
7898   DBUG_EXECUTE_IF(
7899       "handler_crashed_table_on_usage",
7900       my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7901       set_my_errno(HA_ERR_CRASHED); return (HA_ERR_CRASHED););
7902 
7903   mark_trx_read_write();
7904 
7905   MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
7906                       { error = delete_row(buf); })
7907 
7908   if (unlikely(error)) return error;
7909   if (unlikely((error = binlog_log_row(table, buf, nullptr, log_func))))
7910     return error;
7911   return 0;
7912 }
7913 
7914 /** @brief
7915   use_hidden_primary_key() is called in case of an update/delete when
7916   (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
7917   but we don't have a primary key
7918 */
use_hidden_primary_key()7919 void handler::use_hidden_primary_key() {
7920   /* fallback to use all columns in the table to identify row */
7921   table->use_all_columns();
7922 }
7923 
7924 /**
7925   Get an initialized ha_share.
7926 
7927   @return Initialized ha_share
7928     @retval NULL    ha_share is not yet initialized.
7929     @retval != NULL previous initialized ha_share.
7930 
7931   @note
7932   If not a temp table, then LOCK_ha_data must be held.
7933 */
7934 
get_ha_share_ptr()7935 Handler_share *handler::get_ha_share_ptr() {
7936   DBUG_TRACE;
7937   DBUG_ASSERT(ha_share && table_share);
7938 
7939 #ifndef DBUG_OFF
7940   if (table_share->tmp_table == NO_TMP_TABLE)
7941     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7942 #endif
7943 
7944   return *ha_share;
7945 }
7946 
7947 /**
7948   Set ha_share to be used by all instances of the same table/partition.
7949 
7950   @param arg_ha_share    Handler_share to be shared.
7951 
7952   @note
7953   If not a temp table, then LOCK_ha_data must be held.
7954 */
7955 
set_ha_share_ptr(Handler_share * arg_ha_share)7956 void handler::set_ha_share_ptr(Handler_share *arg_ha_share) {
7957   DBUG_TRACE;
7958   DBUG_ASSERT(ha_share);
7959 #ifndef DBUG_OFF
7960   if (table_share->tmp_table == NO_TMP_TABLE)
7961     mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7962 #endif
7963 
7964   *ha_share = arg_ha_share;
7965 }
7966 
7967 /**
7968   Take a lock for protecting shared handler data.
7969 */
7970 
lock_shared_ha_data()7971 void handler::lock_shared_ha_data() {
7972   DBUG_ASSERT(table_share);
7973   if (table_share->tmp_table == NO_TMP_TABLE)
7974     mysql_mutex_lock(&table_share->LOCK_ha_data);
7975 }
7976 
7977 /**
7978   Release lock for protecting ha_share.
7979 */
7980 
unlock_shared_ha_data()7981 void handler::unlock_shared_ha_data() {
7982   DBUG_ASSERT(table_share);
7983   if (table_share->tmp_table == NO_TMP_TABLE)
7984     mysql_mutex_unlock(&table_share->LOCK_ha_data);
7985 }
7986 
7987 /**
7988   This structure is a helper structure for passing the length and pointer of
7989   blob space allocated by storage engine.
7990 */
7991 struct blob_len_ptr {
7992   uint length;  // length of the blob
7993   uchar *ptr;   // pointer of the value
7994 };
7995 
7996 /**
7997   Get the blob length and pointer of allocated space from the record buffer.
7998 
7999   During evaluating the blob virtual generated columns, the blob space will
8000   be allocated by server. In order to keep the blob data after the table is
8001   closed, we need write the data into a specified space allocated by storage
8002   engine. Here, we have to extract the space pointer and length from the
8003   record buffer.
8004   After we get the value of virtual generated columns, copy the data into
8005   the specified space and store it in the record buffer (@see copy_blob_data()).
8006 
8007   @param table                    the pointer of table
8008   @param fields                   bitmap of field index of evaluated
8009                                   generated column
8010   @param[out] blob_len_ptr_array  an array to record the length and pointer
8011                                   of allocated space by storage engine.
8012   @note The caller should provide the blob_len_ptr_array with a size of
8013         MAX_FIELDS.
8014 */
8015 
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8016 static void extract_blob_space_and_length_from_record_buff(
8017     const TABLE *table, const MY_BITMAP *const fields,
8018     blob_len_ptr *blob_len_ptr_array) {
8019   int num = 0;
8020   for (Field **vfield = table->vfield; *vfield; vfield++) {
8021     // Check if this field should be included
8022     if (bitmap_is_set(fields, (*vfield)->field_index()) &&
8023         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB) {
8024       auto field = down_cast<Field_blob *>(*vfield);
8025       blob_len_ptr_array[num].length = field->data_length();
8026       // TODO: The following check is only for Innodb.
8027       DBUG_ASSERT(blob_len_ptr_array[num].length == 255 ||
8028                   blob_len_ptr_array[num].length == 768 ||
8029                   blob_len_ptr_array[num].length == 3073);
8030 
8031       blob_len_ptr_array[num].ptr = field->get_blob_data();
8032 
8033       // Let server allocate the space for BLOB virtual generated columns
8034       field->reset();
8035 
8036       num++;
8037       DBUG_ASSERT(num <= MAX_FIELDS);
8038     }
8039   }
8040 }
8041 
8042 /**
8043   Copy the value of BLOB virtual generated columns into the space allocated
8044   by storage engine.
8045 
8046   This is because the table is closed after evaluating the value. In order to
8047   keep the BLOB value after the table is closed, we have to copy the value into
8048   the place where storage engine prepares for.
8049 
8050   @param table              pointer of the table to be operated on
8051   @param fields             bitmap of field index of evaluated generated column
8052   @param blob_len_ptr_array array of length and pointer of allocated space by
8053                             storage engine.
8054 */
8055 
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8056 static void copy_blob_data(const TABLE *table, const MY_BITMAP *const fields,
8057                            blob_len_ptr *blob_len_ptr_array) {
8058   uint num = 0;
8059   for (Field **vfield = table->vfield; *vfield; vfield++) {
8060     // Check if this field should be included
8061     if (bitmap_is_set(fields, (*vfield)->field_index()) &&
8062         (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB) {
8063       DBUG_ASSERT(blob_len_ptr_array[num].length > 0);
8064       DBUG_ASSERT(blob_len_ptr_array[num].ptr != nullptr);
8065 
8066       /*
8067         Only copy as much of the blob as the storage engine has
8068         allocated space for. This is sufficient since the only use of the
8069         blob in the storage engine is for using a prefix of it in a
8070         secondary index.
8071       */
8072       uint length = (*vfield)->data_length();
8073       const uint alloc_len = blob_len_ptr_array[num].length;
8074       length = length > alloc_len ? alloc_len : length;
8075 
8076       Field_blob *blob_field = down_cast<Field_blob *>(*vfield);
8077       memcpy(blob_len_ptr_array[num].ptr, blob_field->get_blob_data(), length);
8078       blob_field->store_in_allocated_space(
8079           pointer_cast<char *>(blob_len_ptr_array[num].ptr), length);
8080       num++;
8081       DBUG_ASSERT(num <= MAX_FIELDS);
8082     }
8083   }
8084 }
8085 
8086 /*
8087   Evaluate generated column's value. This is an internal helper reserved for
8088   handler::my_eval_gcolumn_expr().
8089 
8090   @param thd        pointer of THD
8091   @param table      The pointer of table where evaluted generated
8092                     columns are in
8093   @param fields     bitmap of field index of evaluated generated column
8094   @param[in,out] record record buff of base columns generated column depends.
8095                         After calling this function, it will be used to return
8096                         the value of generated column.
8097   @param in_purge   whether the function is called by purge thread
8098 
8099   @return true in case of error, false otherwise.
8100 */
8101 
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge,const char ** mv_data_ptr,ulong * mv_length)8102 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8103                                         const MY_BITMAP *const fields,
8104                                         uchar *record, bool in_purge,
8105                                         const char **mv_data_ptr,
8106                                         ulong *mv_length) {
8107   DBUG_TRACE;
8108   DBUG_ASSERT(table && table->vfield);
8109   DBUG_ASSERT(!thd->is_error());
8110 
8111   uchar *old_buf = table->record[0];
8112   repoint_field_to_record(table, old_buf, record);
8113 
8114   blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8115 
8116   /*
8117     If it's purge thread, we need get the space allocated by storage engine
8118     for blob.
8119   */
8120   if (in_purge)
8121     extract_blob_space_and_length_from_record_buff(table, fields,
8122                                                    blob_len_ptr_array);
8123 
8124   bool res = false;
8125   Field *mv_field = nullptr;
8126   MY_BITMAP fields_to_evaluate;
8127   my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8128   bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields);
8129   bitmap_set_all(&fields_to_evaluate);
8130   bitmap_intersect(&fields_to_evaluate, fields);
8131   /*
8132     In addition to evaluating the value for the columns requested by
8133     the caller we also need to evaluate any virtual columns that these
8134     depend on.
8135     This loop goes through the columns that should be evaluated and
8136     adds all the base columns. If the base column is virtual, it has
8137     to be evaluated.
8138   */
8139   for (Field **vfield_ptr = table->vfield; *vfield_ptr; vfield_ptr++) {
8140     Field *field = *vfield_ptr;
8141     // Validate that the field number is less than the bit map size
8142     DBUG_ASSERT(field->field_index() < fields->n_bits);
8143 
8144     if (bitmap_is_set(fields, field->field_index())) {
8145       bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8146       if (field->is_array()) {
8147         mv_field = field;
8148         // Backup current value and use dedicated temporary buffer
8149         if ((down_cast<Field_blob *>(field))->backup_blob_field()) return true;
8150       }
8151     }
8152   }
8153 
8154   /*
8155     Evaluate all requested columns and all base columns these depends
8156     on that are virtual.
8157 
8158     This function is called by the storage engine, which may request to
8159     evaluate more generated columns than read_set/write_set says.
8160     For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8161     record from the clustered index and asks us to compute generated columns
8162     that match key fields in the used secondary index. So we trust that the
8163     engine has filled all base columns necessary to requested computations,
8164     and we ignore read_set/write_set.
8165  */
8166 
8167   my_bitmap_map *old_maps[2];
8168   dbug_tmp_use_all_columns(table, old_maps, table->read_set, table->write_set);
8169 
8170   for (Field **vfield_ptr = table->vfield; *vfield_ptr; vfield_ptr++) {
8171     Field *field = *vfield_ptr;
8172 
8173     // Check if we should evaluate this field
8174     if (bitmap_is_set(&fields_to_evaluate, field->field_index()) &&
8175         field->is_virtual_gcol()) {
8176       DBUG_ASSERT(field->gcol_info && field->gcol_info->expr_item->fixed);
8177 
8178       const type_conversion_status save_in_field_status =
8179           field->gcol_info->expr_item->save_in_field(field, false);
8180       DBUG_ASSERT(!thd->is_error() || save_in_field_status != TYPE_OK);
8181 
8182       /*
8183         save_in_field() may return non-zero even if there was no
8184         error. This happens if a warning is raised, such as an
8185         out-of-range warning when converting the result to the target
8186         type of the virtual column. We should stop only if the
8187         non-zero return value was caused by an actual error.
8188       */
8189       if (save_in_field_status != TYPE_OK && thd->is_error()) {
8190         res = true;
8191         break;
8192       }
8193     }
8194   }
8195 
8196   dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8197 
8198   /*
8199     If it's a purge thread, we need copy the blob data into specified place
8200     allocated by storage engine so that the blob data still can be accessed
8201     after table is closed.
8202   */
8203   if (in_purge) copy_blob_data(table, fields, blob_len_ptr_array);
8204 
8205   if (mv_field) {
8206     DBUG_ASSERT(mv_data_ptr);
8207     Field_json *fld = down_cast<Field_json *>(mv_field);
8208     // Save calculated value
8209     *mv_data_ptr = fld->get_binary();
8210     *mv_length = fld->data_length();
8211     // Restore original value
8212     (fld)->restore_blob_backup();
8213   }
8214 
8215   repoint_field_to_record(table, record, old_buf);
8216   return res;
8217 }
8218 
8219 // Set se_private_id and se_private_data during upgrade
ha_upgrade_table(THD * thd,const char * dbname,const char * table_name,dd::Table * dd_table,TABLE * table_arg)8220 bool handler::ha_upgrade_table(THD *thd, const char *dbname,
8221                                const char *table_name, dd::Table *dd_table,
8222                                TABLE *table_arg) {
8223   table = table_arg;
8224   return upgrade_table(thd, dbname, table_name, dd_table);
8225 }
8226 
8227 /**
8228    Callback to allow InnoDB to prepare a template for generated
8229    column processing. This function will open the table without
8230    opening in the engine and call the provided function with
8231    the TABLE object made. The function will then close the TABLE.
8232 
8233    @param thd            Thread handle
8234    @param db_name        Name of database containing the table
8235    @param table_name     Name of table to open
8236    @param myc            InnoDB function to call for processing TABLE
8237    @param ib_table       Argument for InnoDB function
8238 
8239    @return true in case of error, false otherwise.
8240 */
8241 
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8242 bool handler::my_prepare_gcolumn_template(THD *thd, const char *db_name,
8243                                           const char *table_name,
8244                                           my_gcolumn_template_callback_t myc,
8245                                           void *ib_table) {
8246   char path[FN_REFLEN + 1];
8247   bool was_truncated;
8248   build_table_filename(path, sizeof(path) - 1 - reg_ext_length, db_name,
8249                        table_name, "", 0, &was_truncated);
8250   DBUG_ASSERT(!was_truncated);
8251   bool rc = true;
8252 
8253   MDL_ticket *mdl_ticket = nullptr;
8254   if (dd::acquire_shared_table_mdl(thd, db_name, table_name, false,
8255                                    &mdl_ticket))
8256     return true;
8257 
8258   TABLE *table = nullptr;
8259   {
8260     dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
8261     const dd::Table *tab_obj = nullptr;
8262     if (thd->dd_client()->acquire(db_name, table_name, &tab_obj)) return true;
8263     DBUG_ASSERT(tab_obj);
8264 
8265     // Note! The second-to-last argument to open_table_uncached() must be false,
8266     // since the table already exists in the TDC. Allowing the table to
8267     // be opened in the SE in this case is dangerous as the two shares
8268     // could get conflicting SE private data.
8269     table = open_table_uncached(thd, path, db_name, table_name, false, false,
8270                                 *tab_obj);
8271   }
8272 
8273   dd::release_mdl(thd, mdl_ticket);
8274 
8275   if (table) {
8276     myc(table, ib_table);
8277     intern_close_table(table);
8278     rc = false;
8279   }
8280   return rc;
8281 }
8282 
8283 /**
8284   Callback for generated columns processing. Will open the table, in the
8285   server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8286   processing. This function is a variant of the other
8287   handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8288   object already exists - e.g. from purge threads.
8289 
8290   Note! The call to open_table_uncached() must be made with the second-to-last
8291   argument (open_in_engine) set to false. Failing to do so will cause
8292   deadlocks and incorrect behavior.
8293 
8294   @param thd         thread handle
8295   @param db_name     database containing the table to open
8296   @param table_name  name of table to open
8297   @param fields      bitmap of field index of evaluated generated column
8298   @param record      record buffer
8299   @param[out] mv_data_ptr     For a typed array field in this arg the pointer
8300                               to its value is returned
8301   @param[out] mv_length  Length of the value above
8302 
8303   @return true in case of error, false otherwise.
8304 */
8305 
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record,const char ** mv_data_ptr,ulong * mv_length)8306 bool handler::my_eval_gcolumn_expr_with_open(THD *thd, const char *db_name,
8307                                              const char *table_name,
8308                                              const MY_BITMAP *const fields,
8309                                              uchar *record,
8310                                              const char **mv_data_ptr,
8311                                              ulong *mv_length) {
8312   bool retval = true;
8313 
8314   char path[FN_REFLEN + 1];
8315   bool was_truncated;
8316   build_table_filename(path, sizeof(path) - 1 - reg_ext_length, db_name,
8317                        table_name, "", 0, &was_truncated);
8318   DBUG_ASSERT(!was_truncated);
8319 
8320   MDL_ticket *mdl_ticket = nullptr;
8321   if (dd::acquire_shared_table_mdl(thd, db_name, table_name, false,
8322                                    &mdl_ticket))
8323     return true;
8324 
8325   TABLE *table = nullptr;
8326   {
8327     dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
8328     const dd::Table *tab_obj = nullptr;
8329     if (thd->dd_client()->acquire(db_name, table_name, &tab_obj)) return true;
8330     DBUG_ASSERT(tab_obj);
8331 
8332     table = open_table_uncached(thd, path, db_name, table_name, false, false,
8333                                 *tab_obj);
8334   }
8335 
8336   dd::release_mdl(thd, mdl_ticket);
8337 
8338   if (table) {
8339     retval = my_eval_gcolumn_expr_helper(thd, table, fields, record, true,
8340                                          mv_data_ptr, mv_length);
8341     intern_close_table(table);
8342   }
8343 
8344   return retval;
8345 }
8346 
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,const char ** mv_data_ptr,ulong * mv_length)8347 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8348                                    const MY_BITMAP *const fields, uchar *record,
8349                                    const char **mv_data_ptr, ulong *mv_length) {
8350   DBUG_TRACE;
8351 
8352   const bool res = my_eval_gcolumn_expr_helper(thd, table, fields, record,
8353                                                false, mv_data_ptr, mv_length);
8354   return res;
8355 }
8356 
filter_dup_records()8357 bool handler::filter_dup_records() {
8358   DBUG_ASSERT(inited == INDEX && m_unique);
8359   position(table->record[0]);
8360   return m_unique->unique_add(ref);
8361 }
8362 
ha_extra(enum ha_extra_function operation)8363 int handler::ha_extra(enum ha_extra_function operation) {
8364   if (operation == HA_EXTRA_ENABLE_UNIQUE_RECORD_FILTER) {
8365     // This operation should be called only for active multi-valued index
8366     DBUG_ASSERT(inited == INDEX &&
8367                 (table->key_info[active_index].flags & HA_MULTI_VALUED_KEY));
8368     // This unique filter uses only row id to weed out duplicates. Due to that
8369     // it will work with any active index.
8370     if (!m_unique &&
8371         (!(m_unique = new (*THR_MALLOC) Unique_on_insert(ref_length)) ||
8372          m_unique->init())) {
8373       /* purecov: begin inspected */
8374       destroy(m_unique);
8375       return HA_ERR_OUT_OF_MEM;
8376       /* purecov: end */
8377     }
8378     m_unique->reset(true);
8379     return 0;
8380   } else if (operation == HA_EXTRA_DISABLE_UNIQUE_RECORD_FILTER) {
8381     if (m_unique) {
8382       m_unique->cleanup();
8383       destroy(m_unique);
8384       m_unique = nullptr;
8385     }
8386   }
8387   return extra(operation);
8388 }
8389 
8390 /**
8391   Auxiliary structure for passing information to notify_*_helper()
8392   functions.
8393 */
8394 
8395 struct HTON_NOTIFY_PARAMS {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8396   HTON_NOTIFY_PARAMS(const MDL_key *mdl_key, ha_notification_type mdl_type)
8397       : key(mdl_key),
8398         notification_type(mdl_type),
8399         some_htons_were_notified(false),
8400         victimized(false) {}
8401 
8402   const MDL_key *key;
8403   const ha_notification_type notification_type;
8404   bool some_htons_were_notified;
8405   bool victimized;
8406 };
8407 
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8408 static bool notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin,
8409                                         void *arg) {
8410   handlerton *hton = plugin_data<handlerton *>(plugin);
8411   if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl) {
8412     HTON_NOTIFY_PARAMS *params = reinterpret_cast<HTON_NOTIFY_PARAMS *>(arg);
8413 
8414     if (hton->notify_exclusive_mdl(thd, params->key, params->notification_type,
8415                                    &params->victimized)) {
8416       // Ignore failures from post event notification.
8417       if (params->notification_type == HA_NOTIFY_PRE_EVENT) return true;
8418     } else
8419       params->some_htons_were_notified = true;
8420   }
8421   return false;
8422 }
8423 
8424 /**
8425   Notify/get permission from all interested storage engines before
8426   acquisition or after release of exclusive metadata lock on object
8427   represented by key.
8428 
8429   @param thd                Thread context.
8430   @param mdl_key            MDL key identifying object on which exclusive
8431                             lock is to be acquired/was released.
8432   @param notification_type  Indicates whether this is pre-acquire or
8433                             post-release notification.
8434   @param victimized        'true' if locking failed as we were selected
8435                             as a victim in order to avoid possible deadlocks.
8436 
8437   See @sa handlerton::notify_exclusive_mdl for details about
8438   calling convention and error reporting.
8439 
8440   @return False - if notification was successful/lock can be acquired,
8441           True - if it has failed/lock should not be acquired.
8442 */
8443 
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8444 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8445                              ha_notification_type notification_type,
8446                              bool *victimized) {
8447   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8448   *victimized = false;
8449   if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8450                      MYSQL_STORAGE_ENGINE_PLUGIN, &params)) {
8451     *victimized = params.victimized;
8452     /*
8453       If some SE hasn't given its permission to acquire lock and some SEs
8454       has given their permissions, we need to notify the latter group about
8455       failed lock acquisition. We do this by calling post-release notification
8456       for all interested SEs unconditionally.
8457     */
8458     if (notification_type == HA_NOTIFY_PRE_EVENT &&
8459         params.some_htons_were_notified) {
8460       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8461       (void)plugin_foreach(thd, notify_exclusive_mdl_helper,
8462                            MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8463     }
8464     return true;
8465   }
8466   return false;
8467 }
8468 
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8469 static bool notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg) {
8470   handlerton *hton = plugin_data<handlerton *>(plugin);
8471   if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table) {
8472     HTON_NOTIFY_PARAMS *params = reinterpret_cast<HTON_NOTIFY_PARAMS *>(arg);
8473 
8474     if (hton->notify_alter_table(thd, params->key, params->notification_type)) {
8475       // Ignore failures from post event notification.
8476       if (params->notification_type == HA_NOTIFY_PRE_EVENT) return true;
8477     } else
8478       params->some_htons_were_notified = true;
8479   }
8480   return false;
8481 }
8482 
8483 /**
8484   Notify/get permission from all interested storage engines before
8485   or after executed ALTER TABLE on the table identified by key.
8486 
8487   @param thd                Thread context.
8488   @param mdl_key            MDL key identifying table.
8489   @param notification_type  Indicates whether this is pre-ALTER or
8490                             post-ALTER notification.
8491 
8492   See @sa handlerton::notify_alter_table for rationale,
8493   details about calling convention and error reporting.
8494 
8495   @return False - if notification was successful/ALTER TABLE can
8496                   proceed.
8497           True -  if it has failed/ALTER TABLE should fail.
8498 */
8499 
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)8500 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
8501                            ha_notification_type notification_type) {
8502   HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8503 
8504   if (plugin_foreach(thd, notify_alter_table_helper,
8505                      MYSQL_STORAGE_ENGINE_PLUGIN, &params)) {
8506     /*
8507       If some SE hasn't given its permission to do ALTER TABLE and some SEs
8508       has given their permissions, we need to notify the latter group about
8509       failed attemopt. We do this by calling post-ALTER TABLE notification
8510       for all interested SEs unconditionally.
8511     */
8512     if (notification_type == HA_NOTIFY_PRE_EVENT &&
8513         params.some_htons_were_notified) {
8514       HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8515       (void)plugin_foreach(thd, notify_alter_table_helper,
8516                            MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8517     }
8518     return true;
8519   }
8520   return false;
8521 }
8522 
8523 /**
8524   Set the transaction isolation level for the next transaction and update
8525   session tracker information about the transaction isolation level.
8526 
8527   @param thd           THD session setting the tx_isolation.
8528   @param tx_isolation  The isolation level to be set.
8529   @param one_shot      True if the isolation level should be restored to
8530                        session default after finishing the transaction.
8531 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)8532 bool set_tx_isolation(THD *thd, enum_tx_isolation tx_isolation, bool one_shot) {
8533   TX_TRACKER_GET(tst);
8534 
8535   if (thd->variables.session_track_transaction_info <= TX_TRACK_NONE)
8536     tst = nullptr;
8537 
8538   thd->tx_isolation = tx_isolation;
8539 
8540   if (one_shot) {
8541     DBUG_ASSERT(!thd->in_active_multi_stmt_transaction());
8542     DBUG_ASSERT(!thd->in_sub_stmt);
8543     enum enum_tx_isol_level l;
8544     switch (thd->tx_isolation) {
8545       case ISO_READ_UNCOMMITTED:
8546         l = TX_ISOL_UNCOMMITTED;
8547         break;
8548       case ISO_READ_COMMITTED:
8549         l = TX_ISOL_COMMITTED;
8550         break;
8551       case ISO_REPEATABLE_READ:
8552         l = TX_ISOL_REPEATABLE;
8553         break;
8554       case ISO_SERIALIZABLE:
8555         l = TX_ISOL_SERIALIZABLE;
8556         break;
8557       default:
8558         DBUG_ASSERT(0);
8559         return true;
8560     }
8561     if (tst) tst->set_isol_level(thd, l);
8562   } else if (tst) {
8563     tst->set_isol_level(thd, TX_ISOL_INHERIT);
8564   }
8565   return false;
8566 }
8567 
post_recover_handlerton(THD *,plugin_ref plugin,void *)8568 static bool post_recover_handlerton(THD *, plugin_ref plugin, void *) {
8569   handlerton *hton = plugin_data<handlerton *>(plugin);
8570 
8571   if (hton->state == SHOW_OPTION_YES && hton->post_recover)
8572     hton->post_recover();
8573 
8574   return false;
8575 }
8576 
ha_post_recover(void)8577 void ha_post_recover(void) {
8578   (void)plugin_foreach(nullptr, post_recover_handlerton,
8579                        MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
8580 }
8581 
ha_set_primary_handler(handler * primary_handler)8582 void handler::ha_set_primary_handler(handler *primary_handler) {
8583   DBUG_ASSERT((ht->flags & HTON_IS_SECONDARY_ENGINE) != 0);
8584   DBUG_ASSERT(primary_handler->table->s->has_secondary_engine());
8585   m_primary_handler = primary_handler;
8586 }
8587 
8588 /**
8589   Checks if the database name is reserved word used by SE by invoking
8590   the handlerton method.
8591 
8592   @param  plugin        SE plugin.
8593   @param  name          Database name.
8594 
8595   @retval true          If the name is reserved word.
8596   @retval false         If the name is not reserved word.
8597 */
is_reserved_db_name_handlerton(THD *,plugin_ref plugin,void * name)8598 static bool is_reserved_db_name_handlerton(THD *, plugin_ref plugin,
8599                                            void *name) {
8600   handlerton *hton = plugin_data<handlerton *>(plugin);
8601   if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8602     return (hton->is_reserved_db_name(hton, (const char *)name));
8603   return false;
8604 }
8605 
8606 /**
8607    Check if the database name is reserved word used by SE.
8608 
8609    @param  name    Database name.
8610 
8611    @retval true    If the name is a reserved word.
8612    @retval false   If the name is not a reserved word.
8613 */
ha_check_reserved_db_name(const char * name)8614 bool ha_check_reserved_db_name(const char *name) {
8615   return (plugin_foreach(nullptr, is_reserved_db_name_handlerton,
8616                          MYSQL_STORAGE_ENGINE_PLUGIN,
8617                          const_cast<char *>(name)));
8618 }
8619 
8620 /**
8621    Check whether an error is index access error or not
8622    after an index read. Error other than HA_ERR_END_OF_FILE
8623    or HA_ERR_KEY_NOT_FOUND will stop next index read.
8624 
8625    @param  error    Handler error code.
8626 
8627    @retval true     if error is different from HA_ERR_END_OF_FILE or
8628                     HA_ERR_KEY_NOT_FOUND.
8629    @retval false    if error is HA_ERR_END_OF_FILE or HA_ERR_KEY_NOT_FOUND.
8630 */
is_index_access_error(int error)8631 bool is_index_access_error(int error) {
8632   return (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND);
8633 }
8634