1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /** @file sql/handler.cc
24
25 @brief
26 Implements functions in the handler interface that are shared between all
27 storage engines.
28 */
29
30 #include "sql/handler.h"
31
32 #include <ctype.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <algorithm>
38 #include <atomic>
39 #include <cmath>
40 #include <list>
41 #include <random> // std::uniform_real_distribution
42 #include <string>
43 #include <vector>
44
45 #include "keycache.h"
46 #include "libbinlogevents/include/binlog_event.h"
47 #include "m_ctype.h"
48 #include "m_string.h"
49 #include "my_bit.h" // my_count_bits
50 #include "my_bitmap.h" // MY_BITMAP
51 #include "my_check_opt.h"
52 #include "my_dbug.h"
53 #include "my_loglevel.h"
54 #include "my_macros.h"
55 #include "my_pointer_arithmetic.h"
56 #include "my_psi_config.h"
57 #include "my_sqlcommand.h"
58 #include "my_sys.h" // MEM_DEFINED_IF_ADDRESSABLE()
59 #include "myisam.h" // TT_FOR_UPGRADE
60 #include "mysql/components/services/log_builtins.h"
61 #include "mysql/components/services/log_shared.h"
62 #include "mysql/plugin.h"
63 #include "mysql/psi/mysql_file.h"
64 #include "mysql/psi/mysql_mutex.h"
65 #include "mysql/psi/mysql_table.h"
66 #include "mysql/psi/mysql_transaction.h"
67 #include "mysql/psi/psi_base.h"
68 #include "mysql/psi/psi_table.h"
69 #include "mysql/service_mysql_alloc.h"
70 #include "mysql_com.h"
71 #include "mysql_version.h" // MYSQL_VERSION_ID
72 #include "mysqld_error.h"
73 #include "prealloced_array.h"
74 #include "sql/auth/auth_common.h" // check_readonly() and SUPER_ACL
75 #include "sql/binlog.h" // mysql_bin_log
76 #include "sql/check_stack.h"
77 #include "sql/clone_handler.h"
78 #include "sql/current_thd.h"
79 #include "sql/dd/cache/dictionary_client.h" // dd::cache::Dictionary_client
80 #include "sql/dd/dd.h" // dd::get_dictionary
81 #include "sql/dd/dictionary.h" // dd:acquire_shared_table_mdl
82 #include "sql/dd/types/table.h" // dd::Table
83 #include "sql/dd_table_share.h" // open_table_def
84 #include "sql/debug_sync.h" // DEBUG_SYNC
85 #include "sql/derror.h" // ER_DEFAULT
86 #include "sql/error_handler.h" // Internal_error_handler
87 #include "sql/field.h"
88 #include "sql/item.h"
89 #include "sql/lock.h" // MYSQL_LOCK
90 #include "sql/log.h"
91 #include "sql/log_event.h" // Write_rows_log_event
92 #include "sql/mdl.h"
93 #include "sql/mysqld.h" // global_system_variables heap_hton ..
94 #include "sql/opt_costconstantcache.h" // reload_optimizer_cost_constants
95 #include "sql/opt_costmodel.h"
96 #include "sql/opt_hints.h"
97 #include "sql/protocol.h"
98 #include "sql/psi_memory_key.h"
99 #include "sql/query_options.h"
100 #include "sql/record_buffer.h" // Record_buffer
101 #include "sql/rpl_filter.h"
102 #include "sql/rpl_gtid.h"
103 #include "sql/rpl_handler.h" // RUN_HOOK
104 #include "sql/rpl_rli.h" // is_atomic_ddl_commit_on_slave
105 #include "sql/rpl_slave_commit_order_manager.h" // Commit_order_manager
106 #include "sql/rpl_write_set_handler.h" // add_pke
107 #include "sql/sdi_utils.h" // import_serialized_meta_data
108 #include "sql/session_tracker.h"
109 #include "sql/sql_base.h" // free_io_cache
110 #include "sql/sql_bitmap.h"
111 #include "sql/sql_class.h"
112 #include "sql/sql_error.h"
113 #include "sql/sql_lex.h"
114 #include "sql/sql_parse.h" // check_stack_overrun
115 #include "sql/sql_plugin.h" // plugin_foreach
116 #include "sql/sql_select.h" // actual_key_parts
117 #include "sql/sql_table.h" // build_table_filename
118 #include "sql/strfunc.h" // strnncmp_nopads
119 #include "sql/system_variables.h"
120 #include "sql/table.h"
121 #include "sql/tc_log.h"
122 #include "sql/thr_malloc.h"
123 #include "sql/transaction.h" // trans_commit_implicit
124 #include "sql/transaction_info.h"
125 #include "sql/xa.h"
126 #include "sql_string.h"
127 #include "sql_tmp_table.h" // free_tmp_table
128 #include "template_utils.h"
129 #include "uniques.h" // Unique_on_insert
130 #include "varlen_sort.h"
131
132 /**
133 @def MYSQL_TABLE_IO_WAIT
134 Instrumentation helper for table io_waits.
135 Note that this helper is intended to be used from
136 within the handler class only, as it uses members
137 from @c handler
138 Performance schema events are instrumented as follows:
139 - in non batch mode, one event is generated per call
140 - in batch mode, the number of rows affected is saved
141 in @c m_psi_numrows, so that @c end_psi_batch_mode()
142 generates a single event for the batch.
143 @param OP the table operation to be performed
144 @param INDEX the table index used if any, or MAX_KEY.
145 @param RESULT the result of the table operation performed
146 @param PAYLOAD instrumented code to execute
147 @sa handler::end_psi_batch_mode.
148 */
149 #ifdef HAVE_PSI_TABLE_INTERFACE
150 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
151 { \
152 if (m_psi != NULL) { \
153 switch (m_psi_batch_mode) { \
154 case PSI_BATCH_MODE_NONE: { \
155 PSI_table_locker *sub_locker = NULL; \
156 PSI_table_locker_state reentrant_safe_state; \
157 sub_locker = PSI_TABLE_CALL(start_table_io_wait)( \
158 &reentrant_safe_state, m_psi, OP, INDEX, __FILE__, __LINE__); \
159 PAYLOAD \
160 if (sub_locker != NULL) PSI_TABLE_CALL(end_table_io_wait) \
161 (sub_locker, 1); \
162 break; \
163 } \
164 case PSI_BATCH_MODE_STARTING: { \
165 m_psi_locker = PSI_TABLE_CALL(start_table_io_wait)( \
166 &m_psi_locker_state, m_psi, OP, INDEX, __FILE__, __LINE__); \
167 PAYLOAD \
168 if (!RESULT) m_psi_numrows++; \
169 m_psi_batch_mode = PSI_BATCH_MODE_STARTED; \
170 break; \
171 } \
172 case PSI_BATCH_MODE_STARTED: \
173 default: { \
174 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED); \
175 PAYLOAD \
176 if (!RESULT) m_psi_numrows++; \
177 break; \
178 } \
179 } \
180 } else { \
181 PAYLOAD \
182 } \
183 }
184 #else
185 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) PAYLOAD
186 #endif
187
188 /**
189 @def MYSQL_TABLE_LOCK_WAIT
190 Instrumentation helper for table io_waits.
191 @param OP the table operation to be performed
192 @param FLAGS per table operation flags.
193 @param PAYLOAD the code to instrument.
194 @sa MYSQL_END_TABLE_WAIT.
195 */
196 #ifdef HAVE_PSI_TABLE_INTERFACE
197 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
198 { \
199 if (m_psi != NULL) { \
200 PSI_table_locker *locker; \
201 PSI_table_locker_state state; \
202 locker = PSI_TABLE_CALL(start_table_lock_wait)(&state, m_psi, OP, FLAGS, \
203 __FILE__, __LINE__); \
204 PAYLOAD \
205 if (locker != NULL) PSI_TABLE_CALL(end_table_lock_wait)(locker); \
206 } else { \
207 PAYLOAD \
208 } \
209 }
210 #else
211 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) PAYLOAD
212 #endif
213
214 using std::list;
215 using std::log2;
216 using std::max;
217 using std::min;
218
219 /**
220 While we have legacy_db_type, we have this array to
221 check for dups and to find handlerton from legacy_db_type.
222 Remove when legacy_db_type is finally gone
223 */
224 static Prealloced_array<st_plugin_int *, PREALLOC_NUM_HA> se_plugin_array(
225 PSI_NOT_INSTRUMENTED);
226
227 /**
228 Array allowing to check if handlerton is builtin without
229 acquiring LOCK_plugin.
230 */
231 static Prealloced_array<bool, PREALLOC_NUM_HA> builtin_htons(
232 PSI_NOT_INSTRUMENTED);
233
hton2plugin(uint slot)234 st_plugin_int *hton2plugin(uint slot) { return se_plugin_array[slot]; }
235
num_hton2plugins()236 size_t num_hton2plugins() { return se_plugin_array.size(); }
237
insert_hton2plugin(uint slot,st_plugin_int * plugin)238 st_plugin_int *insert_hton2plugin(uint slot, st_plugin_int *plugin) {
239 if (se_plugin_array.assign_at(slot, plugin)) return nullptr;
240 builtin_htons.assign_at(slot, true);
241 return se_plugin_array[slot];
242 }
243
remove_hton2plugin(uint slot)244 st_plugin_int *remove_hton2plugin(uint slot) {
245 st_plugin_int *retval = se_plugin_array[slot];
246 se_plugin_array[slot] = NULL;
247 builtin_htons.assign_at(slot, false);
248 return retval;
249 }
250
ha_resolve_storage_engine_name(const handlerton * db_type)251 const char *ha_resolve_storage_engine_name(const handlerton *db_type) {
252 return db_type == nullptr ? "UNKNOWN" : hton2plugin(db_type->slot)->name.str;
253 }
254
255 static handlerton *installed_htons[128];
256
257 /* number of storage engines (from installed_htons[]) that support 2pc */
258 ulong total_ha_2pc = 0;
259 /* size of savepoint storage area (see ha_init) */
260 ulong savepoint_alloc_size = 0;
261
262 namespace {
263 struct Storage_engine_identifier {
264 const LEX_CSTRING canonical;
265 const LEX_CSTRING legacy;
266 };
267 const Storage_engine_identifier se_names[] = {
268 {{STRING_WITH_LEN("INNODB")}, {STRING_WITH_LEN("INNOBASE")}},
269 {{STRING_WITH_LEN("NDBCLUSTER")}, {STRING_WITH_LEN("NDB")}},
270 {{STRING_WITH_LEN("MEMORY")}, {STRING_WITH_LEN("HEAP")}},
271 {{STRING_WITH_LEN("MRG_MYISAM")}, {STRING_WITH_LEN("MERGE")}}};
272 const auto se_names_end = std::end(se_names);
273 std::vector<std::string> disabled_se_names;
274 } // namespace
275
276 const char *ha_row_type[] = {"",
277 "FIXED",
278 "DYNAMIC",
279 "COMPRESSED",
280 "REDUNDANT",
281 "COMPACT",
282 /* Reserved to be "PAGE" in future versions */ "?",
283 "?",
284 "?",
285 "?"};
286
287 const char *tx_isolation_names[] = {"READ-UNCOMMITTED", "READ-COMMITTED",
288 "REPEATABLE-READ", "SERIALIZABLE", NullS};
289 TYPELIB tx_isolation_typelib = {array_elements(tx_isolation_names) - 1, "",
290 tx_isolation_names, nullptr};
291
292 // Called for each SE to check if given db.table_name is a system table.
293 static bool check_engine_system_table_handlerton(THD *unused, plugin_ref plugin,
294 void *arg);
295
296 static int ha_discover(THD *thd, const char *db, const char *name,
297 uchar **frmblob, size_t *frmlen);
298
299 /**
300 Structure used by SE during check for system table.
301 This structure is passed to each SE handlerton and the status (OUT param)
302 is collected.
303 */
304 struct st_sys_tbl_chk_params {
305 const char *db; // IN param
306 const char *table_name; // IN param
307 bool is_sql_layer_system_table; // IN param
308 legacy_db_type db_type; // IN param
309
310 enum enum_sys_tbl_chk_status {
311 // db.table_name is not a supported system table.
312 NOT_KNOWN_SYSTEM_TABLE,
313 /*
314 db.table_name is a system table,
315 but may not be supported by SE.
316 */
317 KNOWN_SYSTEM_TABLE,
318 /*
319 db.table_name is a system table,
320 and is supported by SE.
321 */
322 SUPPORTED_SYSTEM_TABLE
323 } status; // OUT param
324 };
325
ha_default_plugin(THD * thd)326 static plugin_ref ha_default_plugin(THD *thd) {
327 if (thd->variables.table_plugin) return thd->variables.table_plugin;
328 return my_plugin_lock(thd, &global_system_variables.table_plugin);
329 }
330
331 /** @brief
332 Return the default storage engine handlerton used for non-temp tables
333 for thread
334
335 SYNOPSIS
336 ha_default_handlerton(thd)
337 thd current thread
338
339 RETURN
340 pointer to handlerton
341 */
ha_default_handlerton(THD * thd)342 handlerton *ha_default_handlerton(THD *thd) {
343 plugin_ref plugin = ha_default_plugin(thd);
344 DBUG_ASSERT(plugin);
345 handlerton *hton = plugin_data<handlerton *>(plugin);
346 DBUG_ASSERT(hton);
347 return hton;
348 }
349
ha_default_temp_plugin(THD * thd)350 static plugin_ref ha_default_temp_plugin(THD *thd) {
351 if (thd->variables.temp_table_plugin) return thd->variables.temp_table_plugin;
352 return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
353 }
354
355 /** @brief
356 Return the default storage engine handlerton used for explicitly
357 created temp tables for a thread
358
359 SYNOPSIS
360 ha_default_temp_handlerton(thd)
361 thd current thread
362
363 RETURN
364 pointer to handlerton
365 */
ha_default_temp_handlerton(THD * thd)366 handlerton *ha_default_temp_handlerton(THD *thd) {
367 plugin_ref plugin = ha_default_temp_plugin(thd);
368 DBUG_ASSERT(plugin);
369 handlerton *hton = plugin_data<handlerton *>(plugin);
370 DBUG_ASSERT(hton);
371 return hton;
372 }
373
374 /**
375 Resolve handlerton plugin by name, without checking for "DEFAULT" or
376 HTON_NOT_USER_SELECTABLE.
377
378 @param thd Thread context.
379 @param name Plugin name.
380
381 @return plugin or NULL if not found.
382 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)383 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name) {
384 return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
385 }
386
hton_charset()387 static const CHARSET_INFO &hton_charset() { return *system_charset_info; }
388
389 /**
390 Return the storage engine handlerton for the supplied name.
391
392 @param thd Current thread. May be nullptr, (e.g. during initialize).
393 @param name Name of storage engine.
394 @param is_temp_table true if table is a temporary table.
395
396 @return Pointer to storage engine plugin handle.
397 */
ha_resolve_by_name(THD * thd,const LEX_CSTRING * name,bool is_temp_table)398 plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name,
399 bool is_temp_table) {
400 if (thd && 0 == strnncmp_nopads(hton_charset(), *name,
401 {STRING_WITH_LEN("DEFAULT")})) {
402 return is_temp_table ? ha_default_plugin(thd) : ha_default_temp_plugin(thd);
403 }
404
405 // Note that thd CAN be nullptr here - it is not actually needed by
406 // ha_resolve_by_name_raw().
407 plugin_ref plugin = ha_resolve_by_name_raw(thd, *name);
408 if (plugin == nullptr) {
409 // If we fail to resolve the name passed in, we try to see if it is a
410 // historical alias.
411 auto match = std::find_if(
412 std::begin(se_names), se_names_end,
413 [&](const Storage_engine_identifier &sei) {
414 return (0 == strnncmp_nopads(hton_charset(), *name, sei.legacy));
415 });
416 if (match != se_names_end) {
417 // if it is, we resolve using the new name
418 plugin = ha_resolve_by_name_raw(thd, match->canonical);
419 }
420 }
421 if (plugin != nullptr) {
422 handlerton *hton = plugin_data<handlerton *>(plugin);
423 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE)) return plugin;
424
425 /*
426 unlocking plugin immediately after locking is relatively low cost.
427 */
428 plugin_unlock(thd, plugin);
429 }
430 return nullptr;
431 }
432
433 /**
434 Read a comma-separated list of storage engine names. Look up each in the
435 known list of canonical and legacy names. In case of a match; add both the
436 canonical and the legacy name to disabled_se_names, which is a static vector
437 of disabled storage engine names.
438 If there is no match, the unmodified name is added to the vector.
439 */
set_externally_disabled_storage_engine_names(const char * disabled_list)440 void set_externally_disabled_storage_engine_names(const char *disabled_list) {
441 DBUG_ASSERT(disabled_list != nullptr);
442
443 myu::Split(
444 disabled_list, disabled_list + strlen(disabled_list), myu::IsComma,
445 [](const char *f, const char *l) {
446 auto tr = myu::FindTrimmedRange(f, l, myu::IsSpace);
447 if (tr.first == tr.second) return;
448
449 const LEX_CSTRING dse{tr.first,
450 static_cast<size_t>(tr.second - tr.first)};
451 auto match = std::find_if(
452 std::begin(se_names), se_names_end,
453 [&](const Storage_engine_identifier &seid) {
454 return (
455 (0 == strnncmp_nopads(hton_charset(), dse, seid.canonical)) ||
456 (0 == strnncmp_nopads(hton_charset(), dse, seid.legacy)));
457 });
458 if (match == se_names_end) {
459 disabled_se_names.emplace_back(dse.str, dse.length);
460 return;
461 }
462 disabled_se_names.emplace_back(match->canonical.str,
463 match->canonical.length);
464 disabled_se_names.emplace_back(match->legacy.str, match->legacy.length);
465 });
466 }
467
is_storage_engine_name_externally_disabled(const char * name)468 static bool is_storage_engine_name_externally_disabled(const char *name) {
469 const LEX_CSTRING n{name, strlen(name)};
470 return std::any_of(
471 disabled_se_names.begin(), disabled_se_names.end(),
472 [&](const std::string &dse) {
473 return (0 == strnncmp_nopads(hton_charset(), n,
474 {dse.c_str(), dse.length()}));
475 });
476 }
477
478 /**
479 Returns true if the storage engine of the handlerton argument has
480 been listed in the disabled_storage_engines system variable. @note
481 that the SE may still be internally enabled, that is
482 HaIsInternallyEnabled may return true.
483 */
ha_is_externally_disabled(const handlerton & htnr)484 bool ha_is_externally_disabled(const handlerton &htnr) {
485 const char *se_name = ha_resolve_storage_engine_name(&htnr);
486 DBUG_ASSERT(se_name != nullptr);
487 return is_storage_engine_name_externally_disabled(se_name);
488 }
489
490 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)491 bool ha_is_storage_engine_disabled(handlerton *se_handle) {
492 DBUG_ASSERT(se_handle != nullptr);
493 return ha_is_externally_disabled(*se_handle);
494 }
495
ha_lock_engine(THD * thd,const handlerton * hton)496 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton) {
497 if (hton) {
498 st_plugin_int **plugin = &se_plugin_array[hton->slot];
499
500 #ifdef DBUG_OFF
501 /*
502 Take a shortcut for builtin engines -- return pointer to plugin
503 without acquiring LOCK_plugin mutex. This is safe safe since such
504 plugins are not deleted until shutdown and we don't do reference
505 counting in non-debug builds for them.
506
507 Since we have reference to handlerton on our hands, this method
508 can't be called concurrently to non-builtin handlerton initialization/
509 deinitialization. So it is safe to access builtin_htons[] without
510 additional locking.
511 */
512 if (builtin_htons[hton->slot]) return *plugin;
513
514 return my_plugin_lock(thd, plugin);
515 #else
516 /*
517 We can't take shortcut in debug builds.
518 At least assert that builtin_htons[slot] is set correctly.
519 */
520 DBUG_ASSERT(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == nullptr));
521 return my_plugin_lock(thd, &plugin);
522 #endif
523 }
524 return nullptr;
525 }
526
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)527 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) {
528 plugin_ref plugin;
529 switch (db_type) {
530 case DB_TYPE_DEFAULT:
531 return ha_default_handlerton(thd);
532 default:
533 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
534 (plugin = ha_lock_engine(thd, installed_htons[db_type])))
535 return plugin_data<handlerton *>(plugin);
536 /* fall through */
537 case DB_TYPE_UNKNOWN:
538 return nullptr;
539 }
540 }
541
542 /**
543 Use other database handler if databasehandler is not compiled in.
544 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)545 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
546 bool no_substitute, bool report_error) {
547 DBUG_TRACE;
548 handlerton *hton = ha_resolve_by_legacy_type(thd, database_type);
549 if (ha_storage_engine_is_enabled(hton)) return hton;
550
551 if (no_substitute) {
552 if (report_error) {
553 const char *engine_name = ha_resolve_storage_engine_name(hton);
554 my_error(ER_FEATURE_DISABLED, MYF(0), engine_name, engine_name);
555 }
556 return nullptr;
557 }
558
559 (void)RUN_HOOK(transaction, after_rollback, (thd, false));
560
561 switch (database_type) {
562 case DB_TYPE_MRG_ISAM:
563 return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
564 default:
565 break;
566 }
567
568 return ha_default_handlerton(thd);
569 } /* ha_checktype */
570
571 /**
572 Create handler object for the table in the storage engine.
573
574 @param share TABLE_SHARE for the table, can be NULL if caller
575 didn't perform full-blown open of table definition.
576 @param partitioned Indicates whether table is partitioned.
577 @param alloc Memory root to be used for allocating handler object.
578 @param db_type Table's storage engine.
579
580 @note This function will try to use default storage engine if one which
581 was specified through db_type parameter is not available.
582 */
get_new_handler(TABLE_SHARE * share,bool partitioned,MEM_ROOT * alloc,handlerton * db_type)583 handler *get_new_handler(TABLE_SHARE *share, bool partitioned, MEM_ROOT *alloc,
584 handlerton *db_type) {
585 handler *file;
586 DBUG_TRACE;
587 DBUG_PRINT("enter", ("alloc: %p", alloc));
588
589 if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create) {
590 if ((file = db_type->create(db_type, share, partitioned, alloc)))
591 file->init();
592 return file;
593 }
594 /*
595 Try the default table type
596 Here the call to current_thd() is ok as we call this function a lot of
597 times but we enter this branch very seldom.
598 */
599 return get_new_handler(share, partitioned, alloc,
600 ha_default_handlerton(current_thd));
601 }
602
603 static const char **handler_errmsgs;
604
get_handler_errmsg(int nr)605 static const char *get_handler_errmsg(int nr) {
606 return handler_errmsgs[nr - HA_ERR_FIRST];
607 }
608
609 /**
610 Register handler error messages for use with my_error().
611
612 @retval
613 0 OK
614 @retval
615 !=0 Error
616 */
617
ha_init_errors(void)618 int ha_init_errors(void) {
619 #define SETMSG(nr, msg) handler_errmsgs[(nr)-HA_ERR_FIRST] = (msg)
620
621 /* Allocate a pointer array for the error message strings. */
622 /* Zerofill it to avoid uninitialized gaps. */
623 if (!(handler_errmsgs = (const char **)my_malloc(
624 key_memory_handler_errmsgs, HA_ERR_ERRORS * sizeof(char *),
625 MYF(MY_WME | MY_ZEROFILL))))
626 return 1;
627
628 /* Set the dedicated error messages. */
629 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
630 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
631 SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
632 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
633 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
634 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
635 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
636 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
637 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
638 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
639 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
640 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
641 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
642 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
643 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
644 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
645 SETMSG(HA_ERR_TOO_BIG_ROW, "Too big row");
646 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
647 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
648 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
649 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
650 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
651 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
652 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
653 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
654 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
655 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
656 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
657 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
658 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
659 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
660 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
661 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
662 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
663 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
664 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
665 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,
666 "FK constraint would lead to duplicate key");
667 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
668 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
669 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
670 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
671 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS,
672 ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
673 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
674 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
675 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
676 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
677 SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
678 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
679 SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,
680 "FTS query exceeds result cache limit");
681 SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,
682 ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
683 SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
684 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,
685 "Too many words in a FTS phrase or proximity search");
686 SETMSG(HA_ERR_TABLE_CORRUPT, ER_DEFAULT(ER_TABLE_CORRUPT));
687 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
688 SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY,
689 ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
690 SETMSG(HA_ERR_WRONG_FILE_NAME, ER_DEFAULT(ER_WRONG_FILE_NAME));
691 SETMSG(HA_ERR_NOT_ALLOWED_COMMAND, ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
692 SETMSG(HA_ERR_COMPUTE_FAILED, "Compute virtual column value failed");
693 SETMSG(HA_ERR_DISK_FULL_NOWAIT, ER_DEFAULT(ER_DISK_FULL_NOWAIT));
694 SETMSG(HA_ERR_NO_SESSION_TEMP, ER_DEFAULT(ER_NO_SESSION_TEMP));
695 SETMSG(HA_ERR_WRONG_TABLE_NAME, ER_DEFAULT(ER_WRONG_TABLE_NAME));
696 SETMSG(HA_ERR_TOO_LONG_PATH, ER_DEFAULT(ER_TABLE_NAME_CAUSES_TOO_LONG_PATH));
697 /* Register the error messages for use with my_error(). */
698 return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
699 }
700
ha_finalize_handlerton(st_plugin_int * plugin)701 int ha_finalize_handlerton(st_plugin_int *plugin) {
702 handlerton *hton = (handlerton *)plugin->data;
703 DBUG_TRACE;
704
705 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
706 if (!hton) goto end;
707
708 switch (hton->state) {
709 case SHOW_OPTION_NO:
710 case SHOW_OPTION_DISABLED:
711 break;
712 case SHOW_OPTION_YES:
713 if (installed_htons[hton->db_type] == hton)
714 installed_htons[hton->db_type] = nullptr;
715 break;
716 };
717
718 if (hton->panic) hton->panic(hton, HA_PANIC_CLOSE);
719
720 if (plugin->plugin->deinit) {
721 /*
722 Today we have no defined/special behavior for uninstalling
723 engine plugins.
724 */
725 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
726 if (plugin->plugin->deinit(nullptr)) {
727 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
728 plugin->name.str));
729 }
730 }
731
732 /*
733 In case a plugin is uninstalled and re-installed later, it should
734 reuse an array slot. Otherwise the number of uninstall/install
735 cycles would be limited.
736 */
737 if (hton->slot != HA_SLOT_UNDEF) {
738 /* Make sure we are not unpluging another plugin */
739 DBUG_ASSERT(se_plugin_array[hton->slot] == plugin);
740 DBUG_ASSERT(hton->slot < se_plugin_array.size());
741 se_plugin_array[hton->slot] = NULL;
742 builtin_htons[hton->slot] = false; /* Extra correctness. */
743 }
744
745 my_free(hton);
746 plugin->data = nullptr;
747 end:
748 return 0;
749 }
750
ha_initialize_handlerton(st_plugin_int * plugin)751 int ha_initialize_handlerton(st_plugin_int *plugin) {
752 handlerton *hton;
753 DBUG_TRACE;
754 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
755
756 hton = (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton),
757 MYF(MY_WME | MY_ZEROFILL));
758
759 if (hton == nullptr) {
760 LogErr(ERROR_LEVEL, ER_HANDLERTON_OOM, plugin->name.str);
761 goto err_no_hton_memory;
762 }
763
764 hton->slot = HA_SLOT_UNDEF;
765 /* Historical Requirement */
766 plugin->data = hton; // shortcut for the future
767 if (plugin->plugin->init && plugin->plugin->init(hton)) {
768 LogErr(ERROR_LEVEL, ER_PLUGIN_INIT_FAILED, plugin->name.str);
769 goto err;
770 }
771
772 /*
773 the switch below and hton->state should be removed when
774 command-line options for plugins will be implemented
775 */
776 DBUG_PRINT("info", ("hton->state=%d", hton->state));
777 switch (hton->state) {
778 case SHOW_OPTION_NO:
779 break;
780 case SHOW_OPTION_YES: {
781 uint tmp;
782 ulong fslot;
783 /* now check the db_type for conflict */
784 if (hton->db_type <= DB_TYPE_UNKNOWN ||
785 hton->db_type >= DB_TYPE_DEFAULT || installed_htons[hton->db_type]) {
786 int idx = (int)DB_TYPE_FIRST_DYNAMIC;
787
788 while (idx < (int)DB_TYPE_DEFAULT && installed_htons[idx]) idx++;
789
790 if (idx == (int)DB_TYPE_DEFAULT) {
791 LogErr(WARNING_LEVEL, ER_TOO_MANY_STORAGE_ENGINES);
792 goto err_deinit;
793 }
794 if (hton->db_type != DB_TYPE_UNKNOWN)
795 LogErr(WARNING_LEVEL, ER_SE_TYPECODE_CONFLICT, plugin->plugin->name,
796 idx);
797 hton->db_type = (enum legacy_db_type)idx;
798 }
799
800 /*
801 In case a plugin is uninstalled and re-installed later, it should
802 reuse an array slot. Otherwise the number of uninstall/install
803 cycles would be limited. So look for a free slot.
804 */
805 DBUG_PRINT("plugin",
806 ("total_ha: %lu", static_cast<ulong>(se_plugin_array.size())));
807 for (fslot = 0; fslot < se_plugin_array.size(); fslot++) {
808 if (!se_plugin_array[fslot]) break;
809 }
810 if (fslot < se_plugin_array.size())
811 hton->slot = fslot;
812 else {
813 hton->slot = se_plugin_array.size();
814 }
815 if (se_plugin_array.assign_at(hton->slot, plugin) ||
816 builtin_htons.assign_at(hton->slot, (plugin->plugin_dl == nullptr)))
817 goto err_deinit;
818
819 installed_htons[hton->db_type] = hton;
820 tmp = hton->savepoint_offset;
821 hton->savepoint_offset = savepoint_alloc_size;
822 savepoint_alloc_size += tmp;
823 if (hton->prepare) total_ha_2pc++;
824 break;
825 }
826 /* fall through */
827 default:
828 hton->state = SHOW_OPTION_DISABLED;
829 break;
830 }
831
832 /*
833 This is entirely for legacy. We will create a new "disk based" hton and a
834 "memory" hton which will be configurable longterm. We should be able to
835 remove partition and myisammrg.
836 */
837 switch (hton->db_type) {
838 case DB_TYPE_HEAP:
839 heap_hton = hton;
840 break;
841 case DB_TYPE_TEMPTABLE:
842 temptable_hton = hton;
843 break;
844 case DB_TYPE_MYISAM:
845 myisam_hton = hton;
846 break;
847 case DB_TYPE_INNODB:
848 innodb_hton = hton;
849 break;
850 default:
851 break;
852 };
853
854 /*
855 Re-load the optimizer cost constants since this storage engine can
856 have non-default cost constants.
857 */
858 reload_optimizer_cost_constants();
859
860 return 0;
861
862 err_deinit:
863 /*
864 Let plugin do its inner deinitialization as plugin->init()
865 was successfully called before.
866 */
867 if (plugin->plugin->deinit) (void)plugin->plugin->deinit(nullptr);
868
869 err:
870 my_free(hton);
871 err_no_hton_memory:
872 plugin->data = nullptr;
873 return 1;
874 }
875
ha_init()876 int ha_init() {
877 int error = 0;
878 DBUG_TRACE;
879
880 /*
881 Check if there is a transaction-capable storage engine besides the
882 binary log.
883 */
884 opt_using_transactions =
885 se_plugin_array.size() > static_cast<ulong>(opt_bin_log);
886 savepoint_alloc_size += sizeof(SAVEPOINT);
887
888 return error;
889 }
890
ha_end()891 void ha_end() {
892 // Unregister handler error messages.
893 my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
894 my_free(handler_errmsgs);
895 }
896
dropdb_handlerton(THD *,plugin_ref plugin,void * path)897 static bool dropdb_handlerton(THD *, plugin_ref plugin, void *path) {
898 handlerton *hton = plugin_data<handlerton *>(plugin);
899 if (hton->state == SHOW_OPTION_YES && hton->drop_database)
900 hton->drop_database(hton, (char *)path);
901 return false;
902 }
903
ha_drop_database(char * path)904 void ha_drop_database(char *path) {
905 plugin_foreach(nullptr, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
906 }
907
closecon_handlerton(THD * thd,plugin_ref plugin,void *)908 static bool closecon_handlerton(THD *thd, plugin_ref plugin, void *) {
909 handlerton *hton = plugin_data<handlerton *>(plugin);
910 /*
911 there's no need to rollback here as all transactions must
912 be rolled back already
913 */
914 if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton)) {
915 if (hton->close_connection) hton->close_connection(hton, thd);
916 /* make sure ha_data is reset and ha_data_lock is released */
917 thd_set_ha_data(thd, hton, nullptr);
918 }
919 return false;
920 }
921
922 /**
923 @note
924 don't bother to rollback here, it's done already
925 */
ha_close_connection(THD * thd)926 void ha_close_connection(THD *thd) {
927 plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
928 nullptr);
929 }
930
kill_handlerton(THD * thd,plugin_ref plugin,void *)931 static bool kill_handlerton(THD *thd, plugin_ref plugin, void *) {
932 handlerton *hton = plugin_data<handlerton *>(plugin);
933
934 if (hton->state == SHOW_OPTION_YES && hton->kill_connection) {
935 if (thd_get_ha_data(thd, hton)) hton->kill_connection(hton, thd);
936 }
937
938 return false;
939 }
940
ha_kill_connection(THD * thd)941 void ha_kill_connection(THD *thd) {
942 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
943 }
944
945 /** Invoke handlerton::pre_dd_shutdown() on a plugin.
946 @param plugin storage engine plugin
947 @retval false (always) */
pre_dd_shutdown_handlerton(THD *,plugin_ref plugin,void *)948 static bool pre_dd_shutdown_handlerton(THD *, plugin_ref plugin, void *) {
949 handlerton *hton = plugin_data<handlerton *>(plugin);
950 if (hton->state == SHOW_OPTION_YES && hton->pre_dd_shutdown)
951 hton->pre_dd_shutdown(hton);
952 return false;
953 }
954
955 /** Invoke handlerton::pre_dd_shutdown() on every storage engine plugin. */
ha_pre_dd_shutdown(void)956 void ha_pre_dd_shutdown(void) {
957 plugin_foreach(nullptr, pre_dd_shutdown_handlerton,
958 MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
959 }
960
961 /* ========================================================================
962 ======================= TRANSACTIONS ===================================*/
963
964 /**
965 Transaction handling in the server
966 ==================================
967
968 In each client connection, MySQL maintains two transactional
969 states:
970 - a statement transaction,
971 - a standard, also called normal transaction.
972
973 Historical note
974 ---------------
975 "Statement transaction" is a non-standard term that comes
976 from the times when MySQL supported BerkeleyDB storage engine.
977
978 First of all, it should be said that in BerkeleyDB auto-commit
979 mode auto-commits operations that are atomic to the storage
980 engine itself, such as a write of a record, and are too
981 high-granular to be atomic from the application perspective
982 (MySQL). One SQL statement could involve many BerkeleyDB
983 auto-committed operations and thus BerkeleyDB auto-commit was of
984 little use to MySQL.
985
986 Secondly, instead of SQL standard savepoints, BerkeleyDB
987 provided the concept of "nested transactions". In a nutshell,
988 transactions could be arbitrarily nested, but when the parent
989 transaction was committed or aborted, all its child (nested)
990 transactions were handled committed or aborted as well.
991 Commit of a nested transaction, in turn, made its changes
992 visible, but not durable: it destroyed the nested transaction,
993 all its changes would become available to the parent and
994 currently active nested transactions of this parent.
995
996 So the mechanism of nested transactions was employed to
997 provide "all or nothing" guarantee of SQL statements
998 required by the standard.
999 A nested transaction would be created at start of each SQL
1000 statement, and destroyed (committed or aborted) at statement
1001 end. Such nested transaction was internally referred to as
1002 a "statement transaction" and gave birth to the term.
1003
1004 (Historical note ends)
1005
1006 Since then a statement transaction is started for each statement
1007 that accesses transactional tables or uses the binary log. If
1008 the statement succeeds, the statement transaction is committed.
1009 If the statement fails, the transaction is rolled back. Commits
1010 of statement transactions are not durable -- each such
1011 transaction is nested in the normal transaction, and if the
1012 normal transaction is rolled back, the effects of all enclosed
1013 statement transactions are undone as well. Technically,
1014 a statement transaction can be viewed as a savepoint which is
1015 maintained automatically in order to make effects of one
1016 statement atomic.
1017
1018 The normal transaction is started by the user and is ended
1019 usually upon a user request as well. The normal transaction
1020 encloses transactions of all statements issued between
1021 its beginning and its end.
1022 In autocommit mode, the normal transaction is equivalent
1023 to the statement transaction.
1024
1025 Since MySQL supports PSEA (pluggable storage engine
1026 architecture), more than one transactional engine can be
1027 active at a time. Hence transactions, from the server
1028 point of view, are always distributed. In particular,
1029 transactional state is maintained independently for each
1030 engine. In order to commit a transaction the two phase
1031 commit protocol is employed.
1032
1033 Not all statements are executed in context of a transaction.
1034 Administrative and status information statements do not modify
1035 engine data, and thus do not start a statement transaction and
1036 also have no effect on the normal transaction. Examples of such
1037 statements are SHOW STATUS and RESET SLAVE.
1038
1039 Similarly DDL statements are not transactional,
1040 and therefore a transaction is [almost] never started for a DDL
1041 statement. The difference between a DDL statement and a purely
1042 administrative statement though is that a DDL statement always
1043 commits the current transaction before proceeding, if there is
1044 any.
1045
1046 At last, SQL statements that work with non-transactional
1047 engines also have no effect on the transaction state of the
1048 connection. Even though they are written to the binary log,
1049 and the binary log is, overall, transactional, the writes
1050 are done in "write-through" mode, directly to the binlog
1051 file, followed with a OS cache sync, in other words,
1052 bypassing the binlog undo log (translog).
1053 They do not commit the current normal transaction.
1054 A failure of a statement that uses non-transactional tables
1055 would cause a rollback of the statement transaction, but
1056 in case there no non-transactional tables are used,
1057 no statement transaction is started.
1058
1059 Data layout
1060 -----------
1061
1062 The server stores its transaction-related data in
1063 thd->transaction. This structure has two members of type
1064 THD_TRANS. These members correspond to the statement and
1065 normal transactions respectively:
1066
1067 - thd->transaction.stmt contains a list of engines
1068 that are participating in the given statement
1069 - thd->transaction.all contains a list of engines that
1070 have participated in any of the statement transactions started
1071 within the context of the normal transaction.
1072 Each element of the list contains a pointer to the storage
1073 engine, engine-specific transactional data, and engine-specific
1074 transaction flags.
1075
1076 In autocommit mode thd->transaction.all is empty.
1077 Instead, data of thd->transaction.stmt is
1078 used to commit/rollback the normal transaction.
1079
1080 The list of registered engines has a few important properties:
1081 - no engine is registered in the list twice
1082 - engines are present in the list a reverse temporal order --
1083 new participants are always added to the beginning of the list.
1084
1085 Transaction life cycle
1086 ----------------------
1087
1088 When a new connection is established, thd->transaction
1089 members are initialized to an empty state.
1090 If a statement uses any tables, all affected engines
1091 are registered in the statement engine list. In
1092 non-autocommit mode, the same engines are registered in
1093 the normal transaction list.
1094 At the end of the statement, the server issues a commit
1095 or a roll back for all engines in the statement list.
1096 At this point transaction flags of an engine, if any, are
1097 propagated from the statement list to the list of the normal
1098 transaction.
1099 When commit/rollback is finished, the statement list is
1100 cleared. It will be filled in again by the next statement,
1101 and emptied again at the next statement's end.
1102
1103 The normal transaction is committed in a similar way
1104 (by going over all engines in thd->transaction.all list)
1105 but at different times:
1106 - upon COMMIT SQL statement is issued by the user
1107 - implicitly, by the server, at the beginning of a DDL statement
1108 or SET AUTOCOMMIT={0|1} statement.
1109
1110 The normal transaction can be rolled back as well:
1111 - if the user has requested so, by issuing ROLLBACK SQL
1112 statement
1113 - if one of the storage engines requested a rollback
1114 by setting thd->transaction_rollback_request. This may
1115 happen in case, e.g., when the transaction in the engine was
1116 chosen a victim of the internal deadlock resolution algorithm
1117 and rolled back internally. When such a situation happens, there
1118 is little the server can do and the only option is to rollback
1119 transactions in all other participating engines. In this case
1120 the rollback is accompanied by an error sent to the user.
1121
1122 As follows from the use cases above, the normal transaction
1123 is never committed when there is an outstanding statement
1124 transaction. In most cases there is no conflict, since
1125 commits of the normal transaction are issued by a stand-alone
1126 administrative or DDL statement, thus no outstanding statement
1127 transaction of the previous statement exists. Besides,
1128 all statements that manipulate with the normal transaction
1129 are prohibited in stored functions and triggers, therefore
1130 no conflicting situation can occur in a sub-statement either.
1131 The remaining rare cases when the server explicitly has
1132 to commit the statement transaction prior to committing the normal
1133 one cover error-handling scenarios (see for example
1134 SQLCOM_LOCK_TABLES).
1135
1136 When committing a statement or a normal transaction, the server
1137 either uses the two-phase commit protocol, or issues a commit
1138 in each engine independently. The two-phase commit protocol
1139 is used only if:
1140 - all participating engines support two-phase commit (provide
1141 handlerton::prepare PSEA API call) and
1142 - transactions in at least two engines modify data (i.e. are
1143 not read-only).
1144
1145 Note that the two phase commit is used for
1146 statement transactions, even though they are not durable anyway.
1147 This is done to ensure logical consistency of data in a multiple-
1148 engine transaction.
1149 For example, imagine that some day MySQL supports unique
1150 constraint checks deferred till the end of statement. In such
1151 case a commit in one of the engines may yield ER_DUP_KEY,
1152 and MySQL should be able to gracefully abort statement
1153 transactions of other participants.
1154
1155 After the normal transaction has been committed,
1156 thd->transaction.all list is cleared.
1157
1158 When a connection is closed, the current normal transaction, if
1159 any, is rolled back.
1160
1161 Roles and responsibilities
1162 --------------------------
1163
1164 The server has no way to know that an engine participates in
1165 the statement and a transaction has been started
1166 in it unless the engine says so. Thus, in order to be
1167 a part of a transaction, the engine must "register" itself.
1168 This is done by invoking trans_register_ha() server call.
1169 Normally the engine registers itself whenever handler::external_lock()
1170 is called. trans_register_ha() can be invoked many times: if
1171 an engine is already registered, the call does nothing.
1172 In case autocommit is not set, the engine must register itself
1173 twice -- both in the statement list and in the normal transaction
1174 list.
1175 In which list to register is a parameter of trans_register_ha().
1176
1177 Note, that although the registration interface in itself is
1178 fairly clear, the current usage practice often leads to undesired
1179 effects. E.g. since a call to trans_register_ha() in most engines
1180 is embedded into implementation of handler::external_lock(), some
1181 DDL statements start a transaction (at least from the server
1182 point of view) even though they are not expected to. E.g.
1183 CREATE TABLE does not start a transaction, since
1184 handler::external_lock() is never called during CREATE TABLE. But
1185 CREATE TABLE ... SELECT does, since handler::external_lock() is
1186 called for the table that is being selected from. This has no
1187 practical effects currently, but must be kept in mind
1188 nevertheless.
1189
1190 Once an engine is registered, the server will do the rest
1191 of the work.
1192
1193 During statement execution, whenever any of data-modifying
1194 PSEA API methods is used, e.g. handler::write_row() or
1195 handler::update_row(), the read-write flag is raised in the
1196 statement transaction for the involved engine.
1197 Currently All PSEA calls are "traced", and the data can not be
1198 changed in a way other than issuing a PSEA call. Important:
1199 unless this invariant is preserved the server will not know that
1200 a transaction in a given engine is read-write and will not
1201 involve the two-phase commit protocol!
1202
1203 At the end of a statement, server call trans_commit_stmt is
1204 invoked. This call in turn invokes handlerton::prepare()
1205 for every involved engine. Prepare is followed by a call
1206 to handlerton::commit_one_phase() If a one-phase commit
1207 will suffice, handlerton::prepare() is not invoked and
1208 the server only calls handlerton::commit_one_phase().
1209 At statement commit, the statement-related read-write
1210 engine flag is propagated to the corresponding flag in the
1211 normal transaction. When the commit is complete, the list
1212 of registered engines is cleared.
1213
1214 Rollback is handled in a similar fashion.
1215
1216 Additional notes on DDL and the normal transaction.
1217 ---------------------------------------------------
1218
1219 DDLs and operations with non-transactional engines
1220 do not "register" in thd->transaction lists, and thus do not
1221 modify the transaction state. Besides, each DDL in
1222 MySQL is prefixed with an implicit normal transaction commit
1223 (a call to trans_commit_implicit()), and thus leaves nothing
1224 to modify.
1225 However, as it has been pointed out with CREATE TABLE .. SELECT,
1226 some DDL statements can start a *new* transaction.
1227
1228 Behaviour of the server in this case is currently badly
1229 defined.
1230 DDL statements use a form of "semantic" logging
1231 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1232 the newly created table is deleted.
1233 In addition, some DDL statements issue interim transaction
1234 commits: e.g. ALTER TABLE issues a commit after data is copied
1235 from the original table to the internal temporary table. Other
1236 statements, e.g. CREATE TABLE ... SELECT do not always commit
1237 after itself.
1238 And finally there is a group of DDL statements such as
1239 RENAME/DROP TABLE that doesn't start a new transaction
1240 and doesn't commit.
1241
1242 This diversity makes it hard to say what will happen if
1243 by chance a stored function is invoked during a DDL --
1244 whether any modifications it makes will be committed or not
1245 is not clear. Fortunately, SQL grammar of few DDLs allows
1246 invocation of a stored function.
1247
1248 A consistent behaviour is perhaps to always commit the normal
1249 transaction after all DDLs, just like the statement transaction
1250 is always committed at the end of all statements.
1251 */
1252
1253 /**
1254 Register a storage engine for a transaction.
1255
1256 Every storage engine MUST call this function when it starts
1257 a transaction or a statement (that is it must be called both for the
1258 "beginning of transaction" and "beginning of statement").
1259 Only storage engines registered for the transaction/statement
1260 will know when to commit/rollback it.
1261
1262 @note
1263 trans_register_ha is idempotent - storage engine may register many
1264 times per transaction.
1265
1266 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid MY_ATTRIBUTE ((unused)))1267 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1268 const ulonglong *trxid MY_ATTRIBUTE((unused))) {
1269 Ha_trx_info *ha_info;
1270 Transaction_ctx *trn_ctx = thd->get_transaction();
1271 Transaction_ctx::enum_trx_scope trx_scope =
1272 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1273
1274 DBUG_TRACE;
1275 DBUG_PRINT("enter", ("%s", all ? "all" : "stmt"));
1276
1277 if (all) {
1278 /*
1279 Ensure no active backup engine data exists, unless the current
1280 transaction is from replication and in active xa state.
1281 */
1282 DBUG_ASSERT(
1283 thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr ||
1284 (thd->get_transaction()->xid_state()->has_state(XID_STATE::XA_ACTIVE)));
1285 DBUG_ASSERT(thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr ||
1286 (thd->is_binlog_applier() || thd->slave_thread));
1287
1288 thd->server_status |= SERVER_STATUS_IN_TRANS;
1289 if (thd->tx_read_only)
1290 thd->server_status |= SERVER_STATUS_IN_TRANS_READONLY;
1291 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1292 }
1293
1294 ha_info = thd->get_ha_data(ht_arg->slot)->ha_info + (all ? 1 : 0);
1295
1296 if (ha_info->is_started()) {
1297 DBUG_ASSERT(trn_ctx->ha_trx_info(trx_scope));
1298 return; /* already registered, return */
1299 }
1300
1301 trn_ctx->register_ha(trx_scope, ha_info, ht_arg);
1302 trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1303
1304 if (ht_arg->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true);
1305
1306 trn_ctx->xid_state()->set_query_id(thd->query_id);
1307 /*
1308 Register transaction start in performance schema if not done already.
1309 By doing this, we handle cases when the transaction is started implicitly in
1310 autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1311 executed statement is a single-statement transaction.
1312
1313 Explicitly started transactions are handled in trans_begin().
1314
1315 Do not register transactions in which binary log is the only participating
1316 transactional storage engine.
1317 */
1318 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1319 if (thd->m_transaction_psi == nullptr && ht_arg->db_type != DB_TYPE_BINLOG &&
1320 !thd->is_attachable_transaction_active()) {
1321 const XID *xid = trn_ctx->xid_state()->get_xid();
1322 bool autocommit = !thd->in_multi_stmt_transaction_mode();
1323 thd->m_transaction_psi = MYSQL_START_TRANSACTION(
1324 &thd->m_transaction_state, xid, trxid, thd->tx_isolation,
1325 thd->tx_read_only, autocommit);
1326 DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1327 gtid_set_performance_schema_values(thd);
1328 }
1329 #endif
1330 }
1331
1332 /** XA Prepare one SE.
1333 @param[in] thd Session THD
1334 @param[in] ht SE handlerton
1335 @return 0 for success, 1 for error - entire transaction is rolled back. */
prepare_one_ht(THD * thd,handlerton * ht)1336 static int prepare_one_ht(THD *thd, handlerton *ht) {
1337 DBUG_TRACE;
1338 DBUG_ASSERT(!thd->status_var_aggregated);
1339 thd->status_var.ha_prepare_count++;
1340 if (ht->prepare) {
1341 DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1342 ha_rollback_trans(thd, true);
1343 return 1;
1344 });
1345 if (ht->prepare(ht, thd, true)) {
1346 ha_rollback_trans(thd, true);
1347 return 1;
1348 }
1349 } else {
1350 push_warning_printf(thd, Sql_condition::SL_WARNING, ER_ILLEGAL_HA,
1351 ER_THD(thd, ER_ILLEGAL_HA),
1352 ha_resolve_storage_engine_name(ht));
1353 }
1354 return 0;
1355 }
1356
1357 /**
1358 @retval
1359 0 ok
1360 @retval
1361 1 error, transaction was rolled back
1362 */
ha_xa_prepare(THD * thd)1363 int ha_xa_prepare(THD *thd) {
1364 int error = 0;
1365 Transaction_ctx *trn_ctx = thd->get_transaction();
1366 DBUG_TRACE;
1367
1368 if (trn_ctx->is_active(Transaction_ctx::SESSION)) {
1369 const Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION);
1370 bool gtid_error = false;
1371 bool need_clear_owned_gtid = false;
1372 std::tie(gtid_error, need_clear_owned_gtid) = commit_owned_gtids(thd, true);
1373 if (gtid_error) {
1374 DBUG_ASSERT(need_clear_owned_gtid);
1375
1376 ha_rollback_trans(thd, true);
1377 error = 1;
1378 goto err;
1379 }
1380
1381 /*
1382 Ensure externalization order for applier threads.
1383
1384 Note: the calls to Commit_order_manager::wait/wait_and_finish() will be
1385 no-op for threads other than replication applier threads.
1386 */
1387 if (Commit_order_manager::wait(thd)) {
1388 thd->commit_error = THD::CE_NONE;
1389 ha_rollback_trans(thd, true);
1390 error = 1;
1391 gtid_error = true;
1392 goto err;
1393 }
1394
1395 /* Allow GTID to be read by SE for XA prepare. */
1396 {
1397 Clone_handler::XA_Operation xa_guard(thd);
1398
1399 /* Prepare binlog SE first, if there. */
1400 while (ha_info != nullptr && error == 0) {
1401 auto ht = ha_info->ht();
1402 if (ht->db_type == DB_TYPE_BINLOG) {
1403 error = prepare_one_ht(thd, ht);
1404 break;
1405 }
1406 ha_info = ha_info->next();
1407 }
1408 /* Prepare all SE other than binlog. */
1409 ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION);
1410 while (ha_info != nullptr && error == 0) {
1411 auto ht = ha_info->ht();
1412 error = prepare_one_ht(thd, ht);
1413 if (error != 0) {
1414 break;
1415 }
1416 ha_info = ha_info->next();
1417 }
1418 }
1419
1420 DBUG_ASSERT(error != 0 || thd->get_transaction()->xid_state()->has_state(
1421 XID_STATE::XA_IDLE));
1422
1423 err:
1424 /*
1425 After ensuring externalization order for applier thread, remove it
1426 from waiting (Commit Order Queue) and allow next applier thread to
1427 be ordered.
1428
1429 Note: the calls to Commit_order_manager::wait_and_finish() will be
1430 no-op for threads other than replication applier threads.
1431 */
1432 Commit_order_manager::wait_and_finish(thd, error);
1433 gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1434 }
1435
1436 return error;
1437 }
1438
1439 /**
1440 Check if we can skip the two-phase commit.
1441
1442 A helper function to evaluate if two-phase commit is mandatory.
1443 As a side effect, propagates the read-only/read-write flags
1444 of the statement transaction to its enclosing normal transaction.
1445
1446 If we have at least two engines with read-write changes we must
1447 run a two-phase commit. Otherwise we can run several independent
1448 commits as the only transactional engine has read-write changes
1449 and others are read-only.
1450
1451 @retval 0 All engines are read-only.
1452 @retval 1 We have the only engine with read-write changes.
1453 @retval >1 More than one engine have read-write changes.
1454 Note: return value might NOT be the exact number of
1455 engines with read-write changes.
1456 */
1457
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1458 static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1459 bool all) {
1460 /* The number of storage engines that have actual changes. */
1461 unsigned rw_ha_count = 0;
1462 Ha_trx_info *ha_info;
1463
1464 for (ha_info = ha_list; ha_info; ha_info = ha_info->next()) {
1465 if (ha_info->is_trx_read_write()) ++rw_ha_count;
1466
1467 if (!all) {
1468 Ha_trx_info *ha_info_all =
1469 &thd->get_ha_data(ha_info->ht()->slot)->ha_info[1];
1470 DBUG_ASSERT(ha_info != ha_info_all);
1471 /*
1472 Merge read-only/read-write information about statement
1473 transaction to its enclosing normal transaction. Do this
1474 only if in a real transaction -- that is, if we know
1475 that ha_info_all is registered in thd->transaction.all.
1476 Since otherwise we only clutter the normal transaction flags.
1477 */
1478 if (ha_info_all->is_started()) /* false if autocommit. */
1479 ha_info_all->coalesce_trx_with(ha_info);
1480 } else if (rw_ha_count > 1) {
1481 /*
1482 It is a normal transaction, so we don't need to merge read/write
1483 information up, and the need for two-phase commit has been
1484 already established. Break the loop prematurely.
1485 */
1486 break;
1487 }
1488 }
1489 return rw_ha_count;
1490 }
1491
1492 /**
1493 The function computes condition to call gtid persistor wrapper,
1494 and executes it.
1495 It is invoked at committing a statement or transaction, including XA,
1496 and also at XA prepare handling.
1497
1498 @param thd Thread context.
1499 @param all The execution scope, true for the transaction one, false
1500 for the statement one.
1501
1502 @return std::pair containing: Error and Owned GTID release status
1503 Error
1504 @retval 0 Ok
1505 @retval !0 Error
1506
1507 Owned GTID release status
1508 @retval true remove the GTID owned by thread from owned GTIDs
1509 @retval false removal of the GTID owned by thread from owned GTIDs
1510 is not required
1511 */
1512
commit_owned_gtids(THD * thd,bool all)1513 std::pair<int, bool> commit_owned_gtids(THD *thd, bool all) {
1514 DBUG_TRACE;
1515 int error = 0;
1516 bool need_clear_owned_gtid = false;
1517
1518 /*
1519 If the binary log is disabled for this thread (either by
1520 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1521 slave thread), then the statement will not be written to
1522 the binary log. In this case, we should save its GTID into
1523 mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1524 did when binlog is enabled.
1525
1526 We also skip saving GTID into mysql.gtid_executed table and
1527 @@GLOBAL.GTID_EXECUTED when slave-preserve-commit-order is enabled. We skip
1528 as GTID will be saved in
1529 Commit_order_manager::flush_engine_and_signal_threads (invoked from
1530 Commit_order_manager::wait_and_finish). In particular, there is the
1531 following call stack under ha_commit_low which save GTID in case its skipped
1532 here:
1533
1534 ha_commit_low ->
1535 Commit_order_manager::wait_and_finish ->
1536 Commit_order_manager::finish ->
1537 Commit_order_manager::flush_engine_and_signal_threads ->
1538 Gtid_state::update_commit_group
1539
1540 We also skip saving GTID for intermediate commits i.e. when
1541 thd->is_operating_substatement_implicitly is enabled.
1542 */
1543 if (thd->is_current_stmt_binlog_log_slave_updates_disabled() &&
1544 ending_trans(thd, all) && !thd->is_operating_gtid_table_implicitly &&
1545 !thd->is_operating_substatement_implicitly) {
1546 if (!has_commit_order_manager(thd) &&
1547 (thd->owned_gtid.sidno > 0 ||
1548 thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)) {
1549 need_clear_owned_gtid = true;
1550 }
1551
1552 /*
1553 If GTID is not persisted by SE, write it to
1554 mysql.gtid_executed table.
1555 */
1556 if (thd->owned_gtid.sidno > 0 && !thd->se_persists_gtid()) {
1557 error = gtid_state->save(thd);
1558 }
1559 }
1560
1561 return std::make_pair(error, need_clear_owned_gtid);
1562 }
1563
1564 /**
1565 @param[in] thd Thread handle.
1566 @param[in] all Session transaction if true, statement
1567 otherwise.
1568 @param[in] ignore_global_read_lock Allow commit to complete even if a
1569 global read lock is active. This can be
1570 used to allow changes to internal tables
1571 (e.g. slave status tables).
1572
1573 @retval
1574 0 ok
1575 @retval
1576 1 transaction was rolled back
1577 @retval
1578 2 error during commit, data may be inconsistent
1579
1580 @todo
1581 Since we don't support nested statement transactions in 5.0,
1582 we can't commit or rollback stmt transactions while we are inside
1583 stored functions or triggers. So we simply do nothing now.
1584 TODO: This should be fixed in later ( >= 5.1) releases.
1585 */
1586
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1587 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock) {
1588 int error = 0;
1589 THD_STAGE_INFO(thd, stage_waiting_for_handler_commit);
1590 bool run_slave_post_commit = false;
1591 bool need_clear_owned_gtid = false;
1592 /*
1593 Save transaction owned gtid into table before transaction prepare
1594 if binlog is disabled, or binlog is enabled and log_slave_updates
1595 is disabled with slave SQL thread or slave worker thread.
1596 */
1597 std::tie(error, need_clear_owned_gtid) = commit_owned_gtids(thd, all);
1598
1599 /*
1600 'all' means that this is either an explicit commit issued by
1601 user, or an implicit commit issued by a DDL.
1602 */
1603 Transaction_ctx *trn_ctx = thd->get_transaction();
1604 Transaction_ctx::enum_trx_scope trx_scope =
1605 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1606
1607 /*
1608 "real" is a nick name for a transaction for which a commit will
1609 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1610 transation is not 'real': even though it's possible to commit it,
1611 the changes are not durable as they might be rolled back if the
1612 enclosing 'all' transaction is rolled back.
1613 */
1614 bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1615
1616 Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope);
1617 XID_STATE *xid_state = trn_ctx->xid_state();
1618
1619 DBUG_TRACE;
1620
1621 DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1622 all, thd->in_sub_stmt, ha_info, is_real_trans));
1623 /*
1624 We must not commit the normal transaction if a statement
1625 transaction is pending. Otherwise statement transaction
1626 flags will not get propagated to its normal transaction's
1627 counterpart.
1628 */
1629 DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all);
1630
1631 DBUG_EXECUTE_IF("pre_commit_error", {
1632 error = true;
1633 my_error(ER_UNKNOWN_ERROR, MYF(0));
1634 });
1635
1636 /*
1637 When atomic DDL is executed on the slave, we would like to
1638 to update slave applier state as part of DDL's transaction.
1639 Call Relay_log_info::pre_commit() hook to do this before DDL
1640 gets committed in the following block.
1641 Failed atomic DDL statements should've been marked as executed/committed
1642 during statement rollback, though some like GRANT may continue until
1643 this point.
1644 When applying a DDL statement on a slave and the statement is filtered
1645 out by a table filter, we report an error "ER_SLAVE_IGNORED_TABLE" to
1646 warn slave applier thread. We need to save the DDL statement's gtid
1647 into mysql.gtid_executed system table if the binary log is disabled
1648 on the slave and gtids are enabled.
1649 */
1650 if (is_real_trans && is_atomic_ddl_commit_on_slave(thd) &&
1651 (!thd->is_error() ||
1652 (thd->is_operating_gtid_table_implicitly &&
1653 thd->get_stmt_da()->mysql_errno() == ER_SLAVE_IGNORED_TABLE))) {
1654 run_slave_post_commit = true;
1655 error = error || thd->rli_slave->pre_commit();
1656
1657 DBUG_EXECUTE_IF("rli_pre_commit_error", {
1658 error = true;
1659 my_error(ER_UNKNOWN_ERROR, MYF(0));
1660 });
1661 DBUG_EXECUTE_IF("slave_crash_before_commit", {
1662 /* This pre-commit crash aims solely at atomic DDL */
1663 DBUG_SUICIDE();
1664 });
1665 }
1666
1667 if (thd->in_sub_stmt) {
1668 DBUG_ASSERT(0);
1669 /*
1670 Since we don't support nested statement transactions in 5.0,
1671 we can't commit or rollback stmt transactions while we are inside
1672 stored functions or triggers. So we simply do nothing now.
1673 TODO: This should be fixed in later ( >= 5.1) releases.
1674 */
1675 if (!all) return 0;
1676 /*
1677 We assume that all statements which commit or rollback main transaction
1678 are prohibited inside of stored functions or triggers. So they should
1679 bail out with error even before ha_commit_trans() call. To be 100% safe
1680 let us throw error in non-debug builds.
1681 */
1682 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1683 return 2;
1684 }
1685
1686 MDL_request mdl_request;
1687 bool release_mdl = false;
1688 if (ha_info && !error) {
1689 uint rw_ha_count = 0;
1690 bool rw_trans;
1691
1692 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1693
1694 /*
1695 skip 2PC if the transaction is empty and it is not marked as started (which
1696 can happen when the slave's binlog is disabled)
1697 */
1698 if (ha_info->is_started())
1699 rw_ha_count = ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1700 trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1701 /* rw_trans is true when we in a transaction changing data */
1702 rw_trans = is_real_trans && (rw_ha_count > 0);
1703
1704 DBUG_EXECUTE_IF("dbug.enabled_commit", {
1705 const char act[] = "now signal Reached wait_for signal.commit_continue";
1706 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
1707 };);
1708 DEBUG_SYNC(thd, "ha_commit_trans_before_acquire_commit_lock");
1709 if (rw_trans && !ignore_global_read_lock) {
1710 /*
1711 Acquire a metadata lock which will ensure that COMMIT is blocked
1712 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1713 COMMIT in progress blocks FTWRL).
1714
1715 We allow the owner of FTWRL to COMMIT; we assume that it knows
1716 what it does.
1717 */
1718 MDL_REQUEST_INIT(&mdl_request, MDL_key::COMMIT, "", "",
1719 MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT);
1720
1721 DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1722 if (thd->mdl_context.acquire_lock(&mdl_request,
1723 thd->variables.lock_wait_timeout)) {
1724 ha_rollback_trans(thd, all);
1725 return 1;
1726 }
1727 release_mdl = true;
1728
1729 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1730 }
1731
1732 if (rw_trans && stmt_has_updated_trans_table(ha_info) &&
1733 check_readonly(thd, true)) {
1734 ha_rollback_trans(thd, all);
1735 error = 1;
1736 goto end;
1737 }
1738
1739 if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1740 error = tc_log->prepare(thd, all);
1741 }
1742 /*
1743 The state of XA transaction is changed to Prepared, intermediately.
1744 It's going to change to the regular NOTR at the end.
1745 The fact of the Prepared state is of interest to binary logger.
1746 */
1747 if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE)) {
1748 DBUG_ASSERT(
1749 thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1750 static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() ==
1751 XA_ONE_PHASE);
1752
1753 xid_state->set_state(XID_STATE::XA_PREPARED);
1754 }
1755 if (error || (error = tc_log->commit(thd, all))) {
1756 ha_rollback_trans(thd, all);
1757 error = 1;
1758 goto end;
1759 }
1760 /*
1761 Mark multi-statement (any autocommit mode) or single-statement
1762 (autocommit=1) transaction as rolled back
1763 */
1764 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1765 if (is_real_trans && thd->m_transaction_psi != nullptr) {
1766 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1767 thd->m_transaction_psi = nullptr;
1768 }
1769 #endif
1770 DBUG_EXECUTE_IF("crash_commit_after",
1771 if (!thd->is_operating_gtid_table_implicitly)
1772 DBUG_SUICIDE(););
1773 end:
1774 if (release_mdl && mdl_request.ticket) {
1775 /*
1776 We do not always immediately release transactional locks
1777 after ha_commit_trans() (see uses of ha_enable_transaction()),
1778 thus we release the commit blocker lock as soon as it's
1779 not needed.
1780 */
1781 DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1782 thd->mdl_context.release_lock(mdl_request.ticket);
1783 }
1784 /* Free resources and perform other cleanup even for 'empty' transactions. */
1785 if (is_real_trans) {
1786 trn_ctx->cleanup();
1787 thd->tx_priority = 0;
1788 }
1789
1790 if (need_clear_owned_gtid) {
1791 thd->server_status &= ~SERVER_STATUS_IN_TRANS;
1792 /*
1793 Release the owned GTID when binlog is disabled, or binlog is
1794 enabled and log_slave_updates is disabled with slave SQL thread
1795 or slave worker thread.
1796 */
1797 if (error)
1798 gtid_state->update_on_rollback(thd);
1799 else
1800 gtid_state->update_on_commit(thd);
1801 } else {
1802 if (has_commit_order_manager(thd) && error) {
1803 gtid_state->update_on_rollback(thd);
1804 }
1805 }
1806 if (run_slave_post_commit) {
1807 DBUG_EXECUTE_IF("slave_crash_after_commit", DBUG_SUICIDE(););
1808
1809 thd->rli_slave->post_commit(error != 0);
1810 /*
1811 SERVER_STATUS_IN_TRANS may've been gained by pre_commit alone
1812 when the main DDL transaction is filtered out of execution.
1813 In such case the status has to be reset now.
1814
1815 TODO: move/refactor this handling onto trans_commit/commit_implicit()
1816 the caller level.
1817 */
1818 thd->server_status &= ~SERVER_STATUS_IN_TRANS;
1819 } else {
1820 DBUG_EXECUTE_IF("slave_crash_after_commit", {
1821 if (thd->slave_thread && thd->rli_slave &&
1822 thd->rli_slave->current_event &&
1823 thd->rli_slave->current_event->get_type_code() ==
1824 binary_log::XID_EVENT &&
1825 !thd->is_operating_substatement_implicitly &&
1826 !thd->is_operating_gtid_table_implicitly)
1827 DBUG_SUICIDE();
1828 });
1829 }
1830
1831 return error;
1832 }
1833
1834 /**
1835 Commit the sessions outstanding transaction.
1836
1837 @pre thd->transaction.flags.commit_low == true
1838 @post thd->transaction.flags.commit_low == false
1839
1840 @note This function does not care about global read lock; the caller
1841 should.
1842
1843 @param[in] thd Thread handle.
1844 @param[in] all Is set in case of explicit commit
1845 (COMMIT statement), or implicit commit
1846 issued by DDL. Is not set when called
1847 at the end of statement, even if
1848 autocommit=1.
1849 @param[in] run_after_commit
1850 True by default, otherwise, does not execute
1851 the after_commit hook in the function.
1852 */
1853
ha_commit_low(THD * thd,bool all,bool run_after_commit)1854 int ha_commit_low(THD *thd, bool all, bool run_after_commit) {
1855 int error = 0;
1856 Transaction_ctx *trn_ctx = thd->get_transaction();
1857 Transaction_ctx::enum_trx_scope trx_scope =
1858 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1859 Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1860
1861 DBUG_TRACE;
1862
1863 if (ha_info) {
1864 bool restore_backup_ha_data = false;
1865 /*
1866 At execution of XA COMMIT ONE PHASE binlog or slave applier
1867 reattaches the engine ha_data to THD, previously saved at XA START.
1868 */
1869 if (all && thd->rpl_unflag_detached_engine_ha_data()) {
1870 DBUG_PRINT("info", ("query='%s'", thd->query().str));
1871 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1872 DBUG_ASSERT(
1873 static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() ==
1874 XA_ONE_PHASE);
1875 restore_backup_ha_data = true;
1876 }
1877
1878 bool is_applier_wait_enabled = false;
1879
1880 /*
1881 Preserve externalization and persistence order for applier threads.
1882
1883 The conditions should be understood as follows:
1884
1885 - When the binlog is enabled, this will be done from
1886 MYSQL_BIN_LOG::ordered_commit and should not be done here.
1887 Therefore, we have the condition
1888 thd->is_current_stmt_binlog_disabled().
1889
1890 - This function is usually called once per statement, with
1891 all=false. We should not preserve the commit order when this
1892 function is called in that context. Therefore, we have the
1893 condition ending_trans(thd, all).
1894
1895 - Statements such as ANALYZE/OPTIMIZE/REPAIR TABLE will call
1896 ha_commit_low multiple times with all=true from within
1897 mysql_admin_table, mysql_recreate_table, and
1898 handle_histogram_command. After returing to
1899 mysql_execute_command, it will call ha_commit_low a final
1900 time. It is only in this final call that we should preserve
1901 the commit order. Therefore, we set the flag
1902 thd->is_operating_substatement_implicitly while executing
1903 mysql_admin_table, mysql_recreate_table, and
1904 handle_histogram_command, clear it when returning from those
1905 functions, and check the flag here in ha_commit_low().
1906
1907 - In all the above cases, we should make the current transaction
1908 fail early in case a previous transaction has rolled back.
1909 Therefore, we also invoke the commit order manager in case
1910 get_rollback_status returns true.
1911
1912 Note: the calls to Commit_order_manager::wait/wait_and_finish() will be
1913 no-op for threads other than replication applier threads.
1914 */
1915 if ((!thd->is_operating_substatement_implicitly &&
1916 !thd->is_operating_gtid_table_implicitly &&
1917 thd->is_current_stmt_binlog_log_slave_updates_disabled() &&
1918 ending_trans(thd, all)) ||
1919 Commit_order_manager::get_rollback_status(thd)) {
1920 if (Commit_order_manager::wait(thd)) {
1921 error = 1;
1922 /*
1923 Remove applier thread from waiting in Commit Order Queue and
1924 allow next applier thread to be ordered.
1925 */
1926 Commit_order_manager::wait_and_finish(thd, error);
1927 goto err;
1928 }
1929 is_applier_wait_enabled = true;
1930 }
1931
1932 for (; ha_info; ha_info = ha_info_next) {
1933 int err;
1934 handlerton *ht = ha_info->ht();
1935 if ((err = ht->commit(ht, thd, all))) {
1936 char errbuf[MYSQL_ERRMSG_SIZE];
1937 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err,
1938 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
1939 error = 1;
1940 }
1941 DBUG_ASSERT(!thd->status_var_aggregated);
1942 thd->status_var.ha_commit_count++;
1943 ha_info_next = ha_info->next();
1944 if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht);
1945 ha_info->reset(); /* keep it conveniently zero-filled */
1946 }
1947 trn_ctx->reset_scope(trx_scope);
1948
1949 /*
1950 After ensuring externalization order for applier thread, remove it
1951 from waiting (Commit Order Queue) and allow next applier thread to
1952 be ordered.
1953
1954 Note: the calls to Commit_order_manager::wait_and_finish() will be
1955 no-op for threads other than replication applier threads.
1956 */
1957 if (is_applier_wait_enabled) {
1958 Commit_order_manager::wait_and_finish(thd, error);
1959 }
1960 }
1961
1962 err:
1963 /* Free resources and perform other cleanup even for 'empty' transactions. */
1964 if (all) trn_ctx->cleanup();
1965 /*
1966 When the transaction has been committed, we clear the commit_low
1967 flag. This allow other parts of the system to check if commit_low
1968 was called.
1969 */
1970 trn_ctx->m_flags.commit_low = false;
1971 if (run_after_commit && thd->get_transaction()->m_flags.run_hooks) {
1972 /*
1973 If commit succeeded, we call the after_commit hook.
1974
1975 TODO: Investigate if this can be refactored so that there is
1976 only one invocation of this hook in the code (in
1977 MYSQL_LOG_BIN::finish_commit).
1978 */
1979 if (!error) (void)RUN_HOOK(transaction, after_commit, (thd, all));
1980 trn_ctx->m_flags.run_hooks = false;
1981 }
1982 return error;
1983 }
1984
ha_rollback_low(THD * thd,bool all)1985 int ha_rollback_low(THD *thd, bool all) {
1986 Transaction_ctx *trn_ctx = thd->get_transaction();
1987 int error = 0;
1988 Transaction_ctx::enum_trx_scope trx_scope =
1989 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1990 Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1991
1992 (void)RUN_HOOK(transaction, before_rollback, (thd, all));
1993
1994 if (ha_info) {
1995 bool restore_backup_ha_data = false;
1996 /*
1997 Similarly to the commit case, the binlog or slave applier
1998 reattaches the engine ha_data to THD.
1999 */
2000 if (all && thd->rpl_unflag_detached_engine_ha_data()) {
2001 DBUG_ASSERT(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
2002 thd->killed == THD::KILL_CONNECTION);
2003
2004 restore_backup_ha_data = true;
2005 }
2006
2007 for (; ha_info; ha_info = ha_info_next) {
2008 int err;
2009 handlerton *ht = ha_info->ht();
2010 if ((err = ht->rollback(ht, thd, all))) { // cannot happen
2011 char errbuf[MYSQL_ERRMSG_SIZE];
2012 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2013 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2014 error = 1;
2015 }
2016 DBUG_ASSERT(!thd->status_var_aggregated);
2017 thd->status_var.ha_rollback_count++;
2018 ha_info_next = ha_info->next();
2019 if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht);
2020 ha_info->reset(); /* keep it conveniently zero-filled */
2021 }
2022 trn_ctx->reset_scope(trx_scope);
2023 }
2024
2025 /*
2026 Thanks to possibility of MDL deadlock rollback request can come even if
2027 transaction hasn't been started in any transactional storage engine.
2028
2029 It is possible to have a call of ha_rollback_low() while handling
2030 failure from ha_xa_prepare() and an error in Daignostics_area still
2031 wasn't set. Therefore it is required to check that an error in
2032 Diagnostics_area is set before calling the method XID_STATE::set_error().
2033
2034 If it wasn't done it would lead to failure of the assertion
2035 DBUG_ASSERT(m_status == DA_ERROR)
2036 in the method Diagnostics_area::mysql_errno().
2037
2038 In case ha_xa_prepare is failed and an error wasn't set in Diagnostics_area
2039 the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2040 the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2041 returned by ha_xa_prepare() is handled.
2042 */
2043 if (all && thd->transaction_rollback_request && thd->is_error())
2044 trn_ctx->xid_state()->set_error(thd);
2045
2046 (void)RUN_HOOK(transaction, after_rollback, (thd, all));
2047 return error;
2048 }
2049
ha_rollback_trans(THD * thd,bool all)2050 int ha_rollback_trans(THD *thd, bool all) {
2051 int error = 0;
2052 Transaction_ctx *trn_ctx = thd->get_transaction();
2053 bool is_xa_rollback = trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2054
2055 /*
2056 "real" is a nick name for a transaction for which a commit will
2057 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2058 transaction is not 'real': even though it's possible to commit it,
2059 the changes are not durable as they might be rolled back if the
2060 enclosing 'all' transaction is rolled back.
2061 We establish the value of 'is_real_trans' by checking
2062 if it's an explicit COMMIT or BEGIN statement, or implicit
2063 commit issued by DDL (in these cases all == true),
2064 or if we're running in autocommit mode (it's only in the autocommit mode
2065 ha_commit_one_phase() is called with an empty
2066 transaction.all.ha_list, see why in trans_register_ha()).
2067 */
2068 bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2069
2070 DBUG_TRACE;
2071
2072 /*
2073 We must not rollback the normal transaction if a statement
2074 transaction is pending.
2075 */
2076 DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all);
2077
2078 if (thd->in_sub_stmt) {
2079 DBUG_ASSERT(0);
2080 /*
2081 If we are inside stored function or trigger we should not commit or
2082 rollback current statement transaction. See comment in ha_commit_trans()
2083 call for more information.
2084 */
2085 if (!all) return 0;
2086 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2087 return 1;
2088 }
2089
2090 if (tc_log) error = tc_log->rollback(thd, all);
2091 /*
2092 Mark multi-statement (any autocommit mode) or single-statement
2093 (autocommit=1) transaction as rolled back
2094 */
2095 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2096 if (all || !thd->in_active_multi_stmt_transaction()) {
2097 MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2098 thd->m_transaction_psi = nullptr;
2099 }
2100 #endif
2101
2102 /* Always cleanup. Even if nht==0. There may be savepoints. */
2103 if (is_real_trans) {
2104 trn_ctx->cleanup();
2105 thd->tx_priority = 0;
2106 }
2107
2108 if (all) thd->transaction_rollback_request = false;
2109
2110 /*
2111 Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2112 complete transaction is being rollback or autocommit=1.
2113 Notice, XA rollback has just invoked update_on_commit() through
2114 tc_log->*rollback* stack.
2115 */
2116 if (is_real_trans && !is_xa_rollback) {
2117 #ifndef XTRABACKUP
2118 /* gtid_state is uninitialized in xtrabackup, and this call
2119 would result in a no-op anyway, as the related thd var is
2120 disabled */
2121 gtid_state->update_on_rollback(thd);
2122 #endif
2123 }
2124
2125 /*
2126 If the transaction cannot be rolled back safely, warn; don't warn if this
2127 is a slave thread (because when a slave thread executes a ROLLBACK, it has
2128 been read from the binary log, so it's 100% sure and normal to produce
2129 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2130 slave SQL thread, it would not stop the thread but just be printed in
2131 the error log; but we don't want users to wonder why they have this
2132 message in the error log, so we don't send it.
2133 */
2134 if (is_real_trans &&
2135 trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION) &&
2136 !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2137 trn_ctx->push_unsafe_rollback_warnings(thd);
2138
2139 return error;
2140 }
2141
2142 /**
2143 Commit the attachable transaction in storage engines.
2144
2145 @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2146 which commits attachable transaction but skips code which is
2147 unnecessary and unsafe for them (like dealing with GTIDs).
2148 Since attachable transactions are read-only their commit only
2149 needs to release resources and cleanup state in SE.
2150
2151 @param thd Current thread
2152
2153 @retval 0 - Success
2154 @retval non-0 - Failure
2155 */
ha_commit_attachable(THD * thd)2156 int ha_commit_attachable(THD *thd) {
2157 int error = 0;
2158 Transaction_ctx *trn_ctx = thd->get_transaction();
2159 Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2160 Ha_trx_info *ha_info_next;
2161
2162 /* This function only handles attachable transactions. */
2163 DBUG_ASSERT(thd->is_attachable_ro_transaction_active());
2164 /*
2165 Since the attachable transaction is AUTOCOMMIT we only need
2166 to care about statement transaction.
2167 */
2168 DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::SESSION));
2169
2170 if (ha_info) {
2171 for (; ha_info; ha_info = ha_info_next) {
2172 /* Attachable transaction is not supposed to modify anything. */
2173 DBUG_ASSERT(!ha_info->is_trx_read_write());
2174
2175 handlerton *ht = ha_info->ht();
2176 if (ht->commit(ht, thd, false)) {
2177 /*
2178 In theory this should not happen since attachable transactions
2179 are read only and therefore commit is supposed to only release
2180 resources/cleanup state. Even if this happens we will simply
2181 continue committing attachable transaction in other SEs.
2182 */
2183 DBUG_ASSERT(false);
2184 error = 1;
2185 }
2186 DBUG_ASSERT(!thd->status_var_aggregated);
2187 thd->status_var.ha_commit_count++;
2188 ha_info_next = ha_info->next();
2189
2190 ha_info->reset(); /* keep it conveniently zero-filled */
2191 }
2192 trn_ctx->reset_scope(Transaction_ctx::STMT);
2193 }
2194
2195 /*
2196 Mark transaction as commited in PSI.
2197 */
2198 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2199 if (thd->m_transaction_psi != nullptr) {
2200 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2201 thd->m_transaction_psi = nullptr;
2202 }
2203 #endif
2204
2205 /* Free resources and perform other cleanup even for 'empty' transactions. */
2206 trn_ctx->cleanup();
2207
2208 return (error);
2209 }
2210
2211 /**
2212 Check if all storage engines used in transaction agree that after
2213 rollback to savepoint it is safe to release MDL locks acquired after
2214 savepoint creation.
2215
2216 @param thd The client thread that executes the transaction.
2217
2218 @return true - It is safe to release MDL locks.
2219 false - If it is not.
2220 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2221 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd) {
2222 Ha_trx_info *ha_info;
2223 Transaction_ctx *trn_ctx = thd->get_transaction();
2224 Transaction_ctx::enum_trx_scope trx_scope =
2225 thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2226
2227 DBUG_TRACE;
2228
2229 /**
2230 Checking whether it is safe to release metadata locks after rollback to
2231 savepoint in all the storage engines that are part of the transaction.
2232 */
2233 for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info;
2234 ha_info = ha_info->next()) {
2235 handlerton *ht = ha_info->ht();
2236 DBUG_ASSERT(ht);
2237
2238 if (ht->savepoint_rollback_can_release_mdl == nullptr ||
2239 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2240 return false;
2241 }
2242
2243 return true;
2244 }
2245
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2246 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) {
2247 int error = 0;
2248 Transaction_ctx *trn_ctx = thd->get_transaction();
2249 Transaction_ctx::enum_trx_scope trx_scope =
2250 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2251
2252 Ha_trx_info *ha_info, *ha_info_next;
2253
2254 DBUG_TRACE;
2255
2256 trn_ctx->set_rw_ha_count(trx_scope, 0);
2257 trn_ctx->set_no_2pc(trx_scope, false);
2258 /*
2259 rolling back to savepoint in all storage engines that were part of the
2260 transaction when the savepoint was set
2261 */
2262 for (ha_info = sv->ha_list; ha_info; ha_info = ha_info->next()) {
2263 int err;
2264 handlerton *ht = ha_info->ht();
2265 DBUG_ASSERT(ht);
2266 DBUG_ASSERT(ht->savepoint_set != nullptr);
2267 if ((err = ht->savepoint_rollback(
2268 ht, thd,
2269 (uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen
2270 char errbuf[MYSQL_ERRMSG_SIZE];
2271 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2272 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2273 error = 1;
2274 }
2275 DBUG_ASSERT(!thd->status_var_aggregated);
2276 thd->status_var.ha_savepoint_rollback_count++;
2277 if (ht->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true);
2278 }
2279
2280 /*
2281 rolling back the transaction in all storage engines that were not part of
2282 the transaction when the savepoint was set
2283 */
2284 for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2285 ha_info = ha_info_next) {
2286 int err;
2287 handlerton *ht = ha_info->ht();
2288 if ((err = ht->rollback(ht, thd, !thd->in_sub_stmt))) { // cannot happen
2289 char errbuf[MYSQL_ERRMSG_SIZE];
2290 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err,
2291 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2292 error = 1;
2293 }
2294 DBUG_ASSERT(!thd->status_var_aggregated);
2295 thd->status_var.ha_rollback_count++;
2296 ha_info_next = ha_info->next();
2297 ha_info->reset(); /* keep it conveniently zero-filled */
2298 }
2299 trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2300
2301 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2302 if (thd->m_transaction_psi != nullptr)
2303 MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2304 #endif
2305
2306 return error;
2307 }
2308
ha_prepare_low(THD * thd,bool all)2309 int ha_prepare_low(THD *thd, bool all) {
2310 int error = 0;
2311 Transaction_ctx::enum_trx_scope trx_scope =
2312 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2313 Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope);
2314
2315 DBUG_TRACE;
2316
2317 if (ha_info) {
2318 for (; ha_info && !error; ha_info = ha_info->next()) {
2319 int err = 0;
2320 handlerton *ht = ha_info->ht();
2321 /*
2322 Do not call two-phase commit if this particular
2323 transaction is read-only. This allows for simpler
2324 implementation in engines that are always read-only.
2325 */
2326 if (!ha_info->is_trx_read_write()) continue;
2327 if ((err = ht->prepare(ht, thd, all))) {
2328 char errbuf[MYSQL_ERRMSG_SIZE];
2329 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err,
2330 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2331 error = 1;
2332 }
2333 DBUG_ASSERT(!thd->status_var_aggregated);
2334 thd->status_var.ha_prepare_count++;
2335 }
2336 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2337 }
2338
2339 return error;
2340 }
2341
2342 /**
2343 @note
2344 according to the sql standard (ISO/IEC 9075-2:2003)
2345 section "4.33.4 SQL-statements and transaction states",
2346 SAVEPOINT is *not* transaction-initiating SQL-statement
2347 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2348 int ha_savepoint(THD *thd, SAVEPOINT *sv) {
2349 int error = 0;
2350 Transaction_ctx::enum_trx_scope trx_scope =
2351 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2352 Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope);
2353 Ha_trx_info *begin_ha_info = ha_info;
2354
2355 DBUG_TRACE;
2356
2357 for (; ha_info; ha_info = ha_info->next()) {
2358 int err;
2359 handlerton *ht = ha_info->ht();
2360 DBUG_ASSERT(ht);
2361 if (!ht->savepoint_set) {
2362 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2363 error = 1;
2364 break;
2365 }
2366 if ((err = ht->savepoint_set(
2367 ht, thd,
2368 (uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen
2369 char errbuf[MYSQL_ERRMSG_SIZE];
2370 my_error(ER_GET_ERRNO, MYF(0), err,
2371 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2372 error = 1;
2373 }
2374 DBUG_ASSERT(!thd->status_var_aggregated);
2375 thd->status_var.ha_savepoint_count++;
2376 }
2377 /*
2378 Remember the list of registered storage engines. All new
2379 engines are prepended to the beginning of the list.
2380 */
2381 sv->ha_list = begin_ha_info;
2382
2383 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2384 if (!error && thd->m_transaction_psi != nullptr)
2385 MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2386 #endif
2387
2388 return error;
2389 }
2390
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2391 int ha_release_savepoint(THD *thd, SAVEPOINT *sv) {
2392 int error = 0;
2393 Ha_trx_info *ha_info = sv->ha_list;
2394 DBUG_TRACE;
2395
2396 for (; ha_info; ha_info = ha_info->next()) {
2397 int err;
2398 handlerton *ht = ha_info->ht();
2399 /* Savepoint life time is enclosed into transaction life time. */
2400 DBUG_ASSERT(ht);
2401 if (!ht->savepoint_release) continue;
2402 if ((err = ht->savepoint_release(
2403 ht, thd,
2404 (uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen
2405 char errbuf[MYSQL_ERRMSG_SIZE];
2406 my_error(ER_GET_ERRNO, MYF(0), err,
2407 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err));
2408 error = 1;
2409 }
2410 }
2411 DBUG_EXECUTE_IF("fail_ha_release_savepoint", {
2412 my_error(ER_UNKNOWN_ERROR, MYF(0));
2413 error = 1;
2414 });
2415
2416 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2417 if (thd->m_transaction_psi != nullptr)
2418 MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2419 #endif
2420 return error;
2421 }
2422
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2423 static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg) {
2424 handlerton *hton = plugin_data<handlerton *>(plugin);
2425 if (hton->state == SHOW_OPTION_YES && hton->start_consistent_snapshot) {
2426 hton->start_consistent_snapshot(hton, thd);
2427 *((bool *)arg) = false;
2428 }
2429 return false;
2430 }
2431
ha_start_consistent_snapshot(THD * thd)2432 int ha_start_consistent_snapshot(THD *thd) {
2433 bool warn = true;
2434
2435 plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2436
2437 /*
2438 Same idea as when one wants to CREATE TABLE in one engine which does not
2439 exist:
2440 */
2441 if (warn)
2442 push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2443 "This MySQL server does not support any "
2444 "consistent-read capable storage engine");
2445 return 0;
2446 }
2447
flush_handlerton(THD *,plugin_ref plugin,void * arg)2448 static bool flush_handlerton(THD *, plugin_ref plugin, void *arg) {
2449 handlerton *hton = plugin_data<handlerton *>(plugin);
2450 if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2451 hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2452 return true;
2453 return false;
2454 }
2455
ha_flush_logs(bool binlog_group_flush)2456 bool ha_flush_logs(bool binlog_group_flush) {
2457 if (plugin_foreach(nullptr, flush_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
2458 static_cast<void *>(&binlog_group_flush))) {
2459 return true;
2460 }
2461 return false;
2462 }
2463
2464 /**
2465 @brief make canonical filename
2466
2467 @param[in] file table handler
2468 @param[in] path original path
2469 @param[out] tmp_path buffer for canonized path
2470
2471 @details Lower case db name and table name path parts for
2472 non file based tables when lower_case_table_names
2473 is 2 (store as is, compare in lower case).
2474 Filesystem path prefix (mysql_data_home or tmpdir)
2475 is left intact.
2476
2477 @note tmp_path may be left intact if no conversion was
2478 performed.
2479
2480 @retval canonized path
2481
2482 @todo This may be done more efficiently when table path
2483 gets built. Convert this function to something like
2484 ASSERT_CANONICAL_FILENAME.
2485 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2486 const char *get_canonical_filename(handler *file, const char *path,
2487 char *tmp_path) {
2488 uint i;
2489 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2490 return path;
2491
2492 for (i = 0; i <= mysql_tmpdir_list.max; i++) {
2493 if (is_prefix(path, mysql_tmpdir_list.list[i])) return path;
2494 }
2495
2496 /* Ensure that table handler get path in lower case */
2497 if (tmp_path != path) my_stpcpy(tmp_path, path);
2498
2499 /*
2500 we only should turn into lowercase database/table part
2501 so start the process after homedirectory
2502 */
2503 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2504 return tmp_path;
2505 }
2506
2507 class Ha_delete_table_error_handler : public Internal_error_handler {
2508 public:
handle_condition(THD *,uint,const char *,Sql_condition::enum_severity_level * level,const char *)2509 virtual bool handle_condition(THD *, uint, const char *,
2510 Sql_condition::enum_severity_level *level,
2511 const char *) {
2512 /* Downgrade errors to warnings. */
2513 if (*level == Sql_condition::SL_ERROR) *level = Sql_condition::SL_WARNING;
2514 return false;
2515 }
2516 };
2517
2518 /**
2519 Delete table from the storage engine.
2520
2521 @param thd Thread context.
2522 @param table_type Handlerton for table's SE.
2523 @param path Path to table (without extension).
2524 @param db Table database.
2525 @param alias Table name.
2526 @param table_def dd::Table object describing the table.
2527 @param generate_warning Indicates whether errors during deletion
2528 should be reported as warnings.
2529
2530 @return 0 - in case of success, non-0 in case of failure, ENOENT
2531 if the file doesn't exists.
2532 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,const dd::Table * table_def,bool generate_warning)2533 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2534 const char *db, const char *alias,
2535 const dd::Table *table_def, bool generate_warning) {
2536 handler *file;
2537 char tmp_path[FN_REFLEN];
2538 int error;
2539 TABLE dummy_table;
2540 TABLE_SHARE dummy_share;
2541 DBUG_TRACE;
2542
2543 dummy_table.s = &dummy_share;
2544
2545 /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2546 if (table_type == nullptr ||
2547 !(file =
2548 get_new_handler((TABLE_SHARE *)nullptr,
2549 table_def->partition_type() != dd::Table::PT_NONE,
2550 thd->mem_root, table_type))) {
2551 return ENOENT;
2552 }
2553
2554 path = get_canonical_filename(file, path, tmp_path);
2555
2556 if ((error = file->ha_delete_table(path, table_def)) && generate_warning) {
2557 /*
2558 Because file->print_error() use my_error() to generate the error message
2559 we use an internal error handler to intercept it and store the text
2560 in a temporary buffer. Later the message will be presented to user
2561 as a warning.
2562 */
2563 Ha_delete_table_error_handler ha_delete_table_error_handler;
2564
2565 /* Fill up strucutures that print_error may need */
2566 dummy_share.path.str = const_cast<char *>(path);
2567 dummy_share.path.length = strlen(path);
2568 dummy_share.db.str = db;
2569 dummy_share.db.length = strlen(db);
2570 dummy_share.table_name.str = alias;
2571 dummy_share.table_name.length = strlen(alias);
2572 dummy_table.alias = alias;
2573
2574 file->change_table_ptr(&dummy_table, &dummy_share);
2575
2576 /*
2577 XXX: should we convert *all* errors to warnings here?
2578 What if the error is fatal?
2579 */
2580 thd->push_internal_handler(&ha_delete_table_error_handler);
2581 file->print_error(error, 0);
2582
2583 thd->pop_internal_handler();
2584 }
2585
2586 destroy(file);
2587
2588 #ifdef HAVE_PSI_TABLE_INTERFACE
2589 if (likely(error == 0)) {
2590 /* Table share not available, so check path for temp_table prefix. */
2591 bool temp_table = (strstr(path, tmp_file_prefix) != nullptr);
2592 PSI_TABLE_CALL(drop_table_share)
2593 (temp_table, db, strlen(db), alias, strlen(alias));
2594 }
2595 #endif
2596
2597 return error;
2598 }
2599
2600 // Prepare HA_CREATE_INFO to be used by ALTER as well as upgrade code.
init_create_options_from_share(const TABLE_SHARE * share,uint used_fields)2601 void HA_CREATE_INFO::init_create_options_from_share(const TABLE_SHARE *share,
2602 uint used_fields) {
2603 if (!(used_fields & HA_CREATE_USED_MIN_ROWS)) min_rows = share->min_rows;
2604
2605 if (!(used_fields & HA_CREATE_USED_MAX_ROWS)) max_rows = share->max_rows;
2606
2607 if (!(used_fields & HA_CREATE_USED_AVG_ROW_LENGTH))
2608 avg_row_length = share->avg_row_length;
2609
2610 if (!(used_fields & HA_CREATE_USED_DEFAULT_CHARSET))
2611 default_table_charset = share->table_charset;
2612
2613 if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE))
2614 key_block_size = share->key_block_size;
2615
2616 if (!(used_fields & HA_CREATE_USED_STATS_SAMPLE_PAGES))
2617 stats_sample_pages = share->stats_sample_pages;
2618
2619 if (!(used_fields & HA_CREATE_USED_STATS_AUTO_RECALC))
2620 stats_auto_recalc = share->stats_auto_recalc;
2621
2622 if (!(used_fields & HA_CREATE_USED_TABLESPACE))
2623 tablespace = share->tablespace;
2624
2625 if (storage_media == HA_SM_DEFAULT)
2626 storage_media = share->default_storage_media;
2627
2628 /* Creation of federated table with LIKE clause needs connection string */
2629 if (!(used_fields & HA_CREATE_USED_CONNECTION))
2630 connect_string = share->connect_string;
2631
2632 if (!(used_fields & HA_CREATE_USED_COMMENT)) {
2633 // Assert to check that used_fields flag and comment are in sync.
2634 DBUG_ASSERT(!comment.str);
2635 comment = share->comment;
2636 }
2637
2638 if (!(used_fields & HA_CREATE_USED_COMPRESS)) {
2639 // Assert to check that used_fields flag and compress are in sync
2640 DBUG_ASSERT(!compress.str);
2641 compress = share->compress;
2642 }
2643
2644 if (!(used_fields & (HA_CREATE_USED_ENCRYPT))) {
2645 // Assert to check that used_fields flag and encrypt_type are in sync
2646 DBUG_ASSERT(!encrypt_type.str);
2647 encrypt_type = share->encrypt_type;
2648 }
2649
2650 if (!(used_fields & HA_CREATE_USED_SECONDARY_ENGINE)) {
2651 DBUG_ASSERT(secondary_engine.str == nullptr);
2652 secondary_engine = share->secondary_engine;
2653 }
2654
2655 if (engine_attribute.str == nullptr)
2656 engine_attribute = share->engine_attribute;
2657
2658 if (secondary_engine_attribute.str == nullptr)
2659 secondary_engine_attribute = share->secondary_engine_attribute;
2660 }
2661
2662 /****************************************************************************
2663 ** General handler functions
2664 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2665 handler *handler::clone(const char *name, MEM_ROOT *mem_root) {
2666 DBUG_TRACE;
2667
2668 handler *new_handler = get_new_handler(
2669 table->s, (table->s->m_part_info != nullptr), mem_root, ht);
2670
2671 if (!new_handler) return nullptr;
2672 if (new_handler->set_ha_share_ref(ha_share)) goto err;
2673
2674 /*
2675 Allocate handler->ref here because otherwise ha_open will allocate it
2676 on this->table->mem_root and we will not be able to reclaim that memory
2677 when the clone handler object is destroyed.
2678 */
2679 if (!(new_handler->ref =
2680 (uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2)))
2681 goto err;
2682 /*
2683 TODO: Implement a more efficient way to have more than one index open for
2684 the same table instance. The ha_open call is not cachable for clone.
2685 */
2686 if (new_handler->ha_open(table, name, table->db_stat,
2687 HA_OPEN_IGNORE_IF_LOCKED, nullptr))
2688 goto err;
2689
2690 return new_handler;
2691
2692 err:
2693 destroy(new_handler);
2694 return nullptr;
2695 }
2696
ha_statistic_increment(ulonglong System_status_var::* offset) const2697 void handler::ha_statistic_increment(
2698 ulonglong System_status_var::*offset) const {
2699 if (table && table->in_use) (table->in_use->status_var.*offset)++;
2700 }
2701
ha_thd(void) const2702 THD *handler::ha_thd(void) const {
2703 DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
2704 return (table && table->in_use) ? table->in_use : current_thd;
2705 }
2706
unbind_psi()2707 void handler::unbind_psi() {
2708 #ifdef HAVE_PSI_TABLE_INTERFACE
2709 DBUG_ASSERT(m_lock_type == F_UNLCK);
2710 DBUG_ASSERT(inited == NONE);
2711 /*
2712 Notify the instrumentation that this table is not owned
2713 by this thread any more.
2714 */
2715 PSI_TABLE_CALL(unbind_table)(m_psi);
2716 #endif
2717 }
2718
rebind_psi()2719 void handler::rebind_psi() {
2720 #ifdef HAVE_PSI_TABLE_INTERFACE
2721 DBUG_ASSERT(m_lock_type == F_UNLCK);
2722 DBUG_ASSERT(inited == NONE);
2723 /*
2724 Notify the instrumentation that this table is now owned
2725 by this thread.
2726 */
2727 PSI_table_share *share_psi = ha_table_share_psi(table_share);
2728 m_psi = PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2729 #endif
2730 }
2731
start_psi_batch_mode()2732 void handler::start_psi_batch_mode() {
2733 #ifdef HAVE_PSI_TABLE_INTERFACE
2734 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2735 DBUG_ASSERT(m_psi_locker == nullptr);
2736 m_psi_batch_mode = PSI_BATCH_MODE_STARTING;
2737 m_psi_numrows = 0;
2738 #endif
2739 }
2740
end_psi_batch_mode()2741 void handler::end_psi_batch_mode() {
2742 #ifdef HAVE_PSI_TABLE_INTERFACE
2743 DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2744 if (m_psi_locker != nullptr) {
2745 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2746 PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2747 m_psi_locker = nullptr;
2748 }
2749 m_psi_batch_mode = PSI_BATCH_MODE_NONE;
2750 #endif
2751 }
2752
ha_table_share_psi(const TABLE_SHARE * share) const2753 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const {
2754 return share->m_psi;
2755 }
2756
2757 /*
2758 Open database handler object.
2759
2760 Used for opening tables. The name will be the name of the file.
2761 A table is opened when it needs to be opened. For instance
2762 when a request comes in for a select on the table (tables are not
2763 open and closed for each request, they are cached).
2764
2765 The server opens all tables by calling ha_open() which then calls
2766 the handler specific open().
2767
2768 Try O_RDONLY if cannot open as O_RDWR. Don't wait for locks if not
2769 HA_OPEN_WAIT_IF_LOCKED is set
2770
2771 @param [out] table_arg Table structure.
2772 @param name Full path of table name.
2773 @param mode Open mode flags.
2774 @param test_if_locked ?
2775 @param table_def dd::Table object describing table
2776 being open. Can be NULL for temporary
2777 tables created by optimizer.
2778
2779 @retval >0 Error.
2780 @retval 0 Success.
2781 */
2782
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked,const dd::Table * table_def)2783 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2784 int test_if_locked, const dd::Table *table_def) {
2785 int error;
2786 DBUG_TRACE;
2787 DBUG_PRINT("enter",
2788 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2789 name, ht->db_type, table_arg->db_stat, mode, test_if_locked));
2790
2791 table = table_arg;
2792 DBUG_ASSERT(table->s == table_share);
2793 DBUG_ASSERT(m_lock_type == F_UNLCK);
2794 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2795 MEM_ROOT *mem_root = (test_if_locked & HA_OPEN_TMP_TABLE)
2796 ? &table->s->mem_root
2797 : &table->mem_root;
2798 DBUG_ASSERT(alloc_root_inited(mem_root));
2799
2800 if ((error = open(name, mode, test_if_locked, table_def))) {
2801 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2802 (table->db_stat & HA_TRY_READ_ONLY)) {
2803 table->db_stat |= HA_READ_ONLY;
2804 error = open(name, O_RDONLY, test_if_locked, table_def);
2805 }
2806 }
2807 if (error) {
2808 set_my_errno(error); /* Safeguard */
2809 DBUG_PRINT("error", ("error: %d errno: %d", error, errno));
2810 } else {
2811 DBUG_ASSERT(m_psi == nullptr);
2812 DBUG_ASSERT(table_share != nullptr);
2813 #ifdef HAVE_PSI_TABLE_INTERFACE
2814 PSI_table_share *share_psi = ha_table_share_psi(table_share);
2815 m_psi = PSI_TABLE_CALL(open_table)(share_psi, this);
2816 #endif
2817
2818 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2819 table->db_stat |= HA_READ_ONLY;
2820 (void)extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2821
2822 /* ref is already allocated for us if we're called from handler::clone() */
2823 if (!ref && !(ref = (uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2))) {
2824 ha_close();
2825 error = HA_ERR_OUT_OF_MEM;
2826 } else
2827 dup_ref = ref + ALIGN_SIZE(ref_length);
2828
2829 // Give the table a defined starting cursor, even if it never actually seeks
2830 // or writes. This is important for things like weedout on const tables
2831 // (which is a nonsensical combination, but can happen).
2832 memset(ref, 0, ref_length);
2833 cached_table_flags = table_flags();
2834 }
2835
2836 return error;
2837 }
2838
2839 /**
2840 Close handler.
2841
2842 Called from sql_base.cc, sql_select.cc, and table.cc.
2843 In sql_select.cc it is only used to close up temporary tables or during
2844 the process where a temporary table is converted over to being a
2845 myisam table.
2846 For sql_base.cc look at close_data_tables().
2847
2848 @return Operation status
2849 @retval 0 Success
2850 @retval != 0 Error (error code returned)
2851 */
2852
ha_close(void)2853 int handler::ha_close(void) {
2854 DBUG_TRACE;
2855 #ifdef HAVE_PSI_TABLE_INTERFACE
2856 PSI_TABLE_CALL(close_table)(table_share, m_psi);
2857 m_psi = nullptr; /* instrumentation handle, invalid after close_table() */
2858 DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2859 DBUG_ASSERT(m_psi_locker == nullptr);
2860 #endif
2861 // TODO: set table= NULL to mark the handler as closed?
2862 DBUG_ASSERT(m_psi == nullptr);
2863 DBUG_ASSERT(m_lock_type == F_UNLCK);
2864 DBUG_ASSERT(inited == NONE);
2865 if (m_unique) {
2866 // It's allocated on memroot and will be freed along with it
2867 m_unique->cleanup();
2868 m_unique = nullptr;
2869 }
2870 return close();
2871 }
2872
2873 /**
2874 Initialize use of index.
2875
2876 @param idx Index to use
2877 @param sorted Use sorted order
2878
2879 @return Operation status
2880 @retval 0 Success
2881 @retval != 0 Error (error code returned)
2882 */
2883
ha_index_init(uint idx,bool sorted)2884 int handler::ha_index_init(uint idx, bool sorted) {
2885 DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2886 int result;
2887 DBUG_TRACE;
2888 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2889 DBUG_ASSERT(inited == NONE);
2890 if (!(result = index_init(idx, sorted))) inited = INDEX;
2891 end_range = nullptr;
2892 return result;
2893 }
2894
2895 /**
2896 End use of index.
2897
2898 @return Operation status
2899 @retval 0 Success
2900 @retval != 0 Error (error code returned)
2901 */
2902
ha_index_end()2903 int handler::ha_index_end() {
2904 DBUG_TRACE;
2905 /* SQL HANDLER function can call this without having it locked. */
2906 DBUG_ASSERT(table->open_by_handler ||
2907 table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2908 DBUG_ASSERT(inited == INDEX);
2909 inited = NONE;
2910 end_range = nullptr;
2911 m_record_buffer = nullptr;
2912 if (m_unique) m_unique->reset(false);
2913 return index_end();
2914 }
2915
2916 /**
2917 Initialize table for random read or scan.
2918
2919 @param scan if true: Initialize for random scans through rnd_next()
2920 if false: Initialize for random reads through rnd_pos()
2921
2922 @return Operation status
2923 @retval 0 Success
2924 @retval != 0 Error (error code returned)
2925 */
2926
ha_rnd_init(bool scan)2927 int handler::ha_rnd_init(bool scan) {
2928 DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2929 int result;
2930 DBUG_TRACE;
2931 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2932 DBUG_ASSERT(inited == NONE || (inited == RND && scan));
2933 inited = (result = rnd_init(scan)) ? NONE : RND;
2934 end_range = nullptr;
2935 return result;
2936 }
2937
2938 /**
2939 End use of random access.
2940
2941 @return Operation status
2942 @retval 0 Success
2943 @retval != 0 Error (error code returned)
2944 */
2945
ha_rnd_end()2946 int handler::ha_rnd_end() {
2947 DBUG_TRACE;
2948 /* SQL HANDLER function can call this without having it locked. */
2949 DBUG_ASSERT(table->open_by_handler ||
2950 table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2951 DBUG_ASSERT(inited == RND);
2952 inited = NONE;
2953 end_range = nullptr;
2954 m_record_buffer = nullptr;
2955 return rnd_end();
2956 }
2957
2958 /**
2959 Read next row via random scan.
2960
2961 @param buf Buffer to read the row into
2962
2963 @return Operation status
2964 @retval 0 Success
2965 @retval != 0 Error (error code returned)
2966 */
2967
ha_rnd_next(uchar * buf)2968 int handler::ha_rnd_next(uchar *buf) {
2969 int result;
2970 DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
2971 DBUG_TRACE;
2972 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
2973 DBUG_ASSERT(inited == RND);
2974
2975 // Set status for the need to update generated fields
2976 m_update_generated_read_fields = table->has_gcol();
2977
2978 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2979 { result = rnd_next(buf); })
2980 if (!result && m_update_generated_read_fields) {
2981 result = update_generated_read_fields(buf, table);
2982 m_update_generated_read_fields = false;
2983 }
2984 table->set_row_status_from_handler(result);
2985 return result;
2986 }
2987
2988 /**
2989 Read row via random scan from position.
2990
2991 @param[out] buf Buffer to read the row into
2992 @param pos Position from position() call
2993
2994 @return Operation status
2995 @retval 0 Success
2996 @retval != 0 Error (error code returned)
2997 */
2998
ha_rnd_pos(uchar * buf,uchar * pos)2999 int handler::ha_rnd_pos(uchar *buf, uchar *pos) {
3000 int result;
3001 DBUG_TRACE;
3002 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3003 /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
3004 /* DBUG_ASSERT(inited == RND); */
3005
3006 // Set status for the need to update generated fields
3007 m_update_generated_read_fields = table->has_gcol();
3008
3009 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3010 { result = rnd_pos(buf, pos); })
3011 if (!result && m_update_generated_read_fields) {
3012 result = update_generated_read_fields(buf, table);
3013 m_update_generated_read_fields = false;
3014 }
3015 table->set_row_status_from_handler(result);
3016 return result;
3017 }
3018
ha_ft_read(uchar * buf)3019 int handler::ha_ft_read(uchar *buf) {
3020 int result;
3021 DBUG_TRACE;
3022
3023 // Set status for the need to update generated fields
3024 m_update_generated_read_fields = table->has_gcol();
3025
3026 result = ft_read(buf);
3027 if (!result && m_update_generated_read_fields) {
3028 result = update_generated_read_fields(buf, table);
3029 m_update_generated_read_fields = false;
3030 }
3031 table->set_row_status_from_handler(result);
3032 return result;
3033 }
3034
ha_sample_init(void * & scan_ctx,double sampling_percentage,int sampling_seed,enum_sampling_method sampling_method)3035 int handler::ha_sample_init(void *&scan_ctx, double sampling_percentage,
3036 int sampling_seed,
3037 enum_sampling_method sampling_method) {
3038 DBUG_TRACE;
3039 DBUG_ASSERT(sampling_percentage >= 0.0);
3040 DBUG_ASSERT(sampling_percentage <= 100.0);
3041 DBUG_ASSERT(inited == NONE);
3042
3043 // Initialise the random number generator.
3044 m_random_number_engine.seed(sampling_seed);
3045 m_sampling_percentage = sampling_percentage;
3046
3047 int result = sample_init(scan_ctx, sampling_percentage, sampling_seed,
3048 sampling_method);
3049 inited = (result != 0) ? NONE : SAMPLING;
3050 return result;
3051 }
3052
ha_sample_end(void * scan_ctx)3053 int handler::ha_sample_end(void *scan_ctx) {
3054 DBUG_TRACE;
3055 DBUG_ASSERT(inited == SAMPLING);
3056 inited = NONE;
3057 int result = sample_end(scan_ctx);
3058 return result;
3059 }
3060
ha_sample_next(void * scan_ctx,uchar * buf)3061 int handler::ha_sample_next(void *scan_ctx, uchar *buf) {
3062 DBUG_TRACE;
3063 DBUG_ASSERT(inited == SAMPLING);
3064
3065 if (m_sampling_percentage == 0.0) return HA_ERR_END_OF_FILE;
3066
3067 m_update_generated_read_fields = table->has_gcol();
3068
3069 int result;
3070 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
3071 { result = sample_next(scan_ctx, buf); })
3072
3073 if (result == 0 && m_update_generated_read_fields) {
3074 result = update_generated_read_fields(buf, table);
3075 m_update_generated_read_fields = false;
3076 }
3077 table->set_row_status_from_handler(result);
3078
3079 return result;
3080 }
3081
sample_init(void * & scan_ctx MY_ATTRIBUTE ((unused)),double,int,enum_sampling_method)3082 int handler::sample_init(void *&scan_ctx MY_ATTRIBUTE((unused)), double, int,
3083 enum_sampling_method) {
3084 return rnd_init(true);
3085 }
3086
sample_end(void * scan_ctx MY_ATTRIBUTE ((unused)))3087 int handler::sample_end(void *scan_ctx MY_ATTRIBUTE((unused))) {
3088 return rnd_end();
3089 }
3090
sample_next(void * scan_ctx MY_ATTRIBUTE ((unused)),uchar * buf)3091 int handler::sample_next(void *scan_ctx MY_ATTRIBUTE((unused)), uchar *buf) {
3092 // Temporary set inited to RND, since we are calling rnd_next().
3093 int res = rnd_next(buf);
3094
3095 std::uniform_real_distribution<double> rnd(0.0, 1.0);
3096 while (!res && rnd(m_random_number_engine) > (m_sampling_percentage / 100.0))
3097 res = rnd_next(buf);
3098
3099 return res;
3100 }
3101
records(ha_rows * num_rows)3102 int handler::records(ha_rows *num_rows) {
3103 if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) {
3104 *num_rows = stats.records;
3105 return 0;
3106 }
3107
3108 int error = 0;
3109 ha_rows rows = 0;
3110 start_psi_batch_mode();
3111
3112 if (!(error = ha_rnd_init(true))) {
3113 while (!table->in_use->killed) {
3114 DBUG_EXECUTE_IF("bug28079850", table->in_use->killed = THD::KILL_QUERY;);
3115 if ((error = ha_rnd_next(table->record[0]))) {
3116 if (error == HA_ERR_RECORD_DELETED)
3117 continue;
3118 else
3119 break;
3120 }
3121 ++rows;
3122 }
3123 }
3124
3125 *num_rows = rows;
3126 end_psi_batch_mode();
3127 int ha_rnd_end_error = 0;
3128 if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR;
3129
3130 // Call ha_rnd_end() only if only if handler has been initialized.
3131 if (inited && (ha_rnd_end_error = ha_rnd_end())) *num_rows = HA_POS_ERROR;
3132
3133 return (error != HA_ERR_END_OF_FILE) ? error : ha_rnd_end_error;
3134 }
3135
records_from_index(ha_rows * num_rows,uint index)3136 int handler::records_from_index(ha_rows *num_rows, uint index) {
3137 if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) {
3138 *num_rows = stats.records;
3139 return 0;
3140 }
3141
3142 int error = 0;
3143 ha_rows rows = 0;
3144 uchar *buf = table->record[0];
3145 start_psi_batch_mode();
3146
3147 if (!(error = ha_index_init(index, false))) {
3148 if (!(error = ha_index_first(buf))) {
3149 rows = 1;
3150
3151 while (!table->in_use->killed) {
3152 DBUG_EXECUTE_IF("bug28079850",
3153 table->in_use->killed = THD::KILL_QUERY;);
3154 if ((error = ha_index_next(buf))) {
3155 if (error == HA_ERR_RECORD_DELETED)
3156 continue;
3157 else
3158 break;
3159 }
3160 ++rows;
3161 }
3162 }
3163 }
3164
3165 *num_rows = rows;
3166 end_psi_batch_mode();
3167 int ha_index_end_error = 0;
3168 if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR;
3169
3170 // Call ha_index_end() only if handler has been initialized.
3171 if (inited && (ha_index_end_error = ha_index_end())) *num_rows = HA_POS_ERROR;
3172
3173 return (error != HA_ERR_END_OF_FILE) ? error : ha_index_end_error;
3174 }
3175
handle_records_error(int error,ha_rows * num_rows)3176 int handler::handle_records_error(int error, ha_rows *num_rows) {
3177 // If query was killed set the error since not all storage engines do it.
3178 if (table->in_use->killed) {
3179 *num_rows = HA_POS_ERROR;
3180 if (error == 0) error = HA_ERR_QUERY_INTERRUPTED;
3181 }
3182
3183 if (error != 0) DBUG_ASSERT(*num_rows == HA_POS_ERROR);
3184 if (*num_rows == HA_POS_ERROR) DBUG_ASSERT(error != 0);
3185 if (error != 0) {
3186 /*
3187 ha_innobase::records may have rolled back internally.
3188 In this case, thd_mark_transaction_to_rollback() will have been called.
3189 For the errors below, we need to abort right away.
3190 */
3191 switch (error) {
3192 case HA_ERR_LOCK_DEADLOCK:
3193 case HA_ERR_LOCK_TABLE_FULL:
3194 case HA_ERR_LOCK_WAIT_TIMEOUT:
3195 case HA_ERR_QUERY_INTERRUPTED:
3196 print_error(error, MYF(0));
3197 return error;
3198 default:
3199 return error;
3200 }
3201 }
3202 return 0;
3203 }
3204
3205 /**
3206 Read [part of] row via [part of] index.
3207 @param[out] buf buffer where store the data
3208 @param key Key to search for
3209 @param keypart_map Which part of key to use
3210 @param find_flag Direction/condition on key usage
3211
3212 @returns Operation status
3213 @retval 0 Success (found a record, and function has
3214 set table status to "has row")
3215 @retval HA_ERR_END_OF_FILE Row not found (function has set table status
3216 to "no row"). End of index passed.
3217 @retval HA_ERR_KEY_NOT_FOUND Row not found (function has set table status
3218 to "no row"). Index cursor positioned.
3219 @retval != 0 Error
3220
3221 @note Positions an index cursor to the index specified in the handle.
3222 Fetches the row if available. If the key value is null,
3223 begin at the first key of the index.
3224 ha_index_read_map can be restarted without calling index_end on the previous
3225 index scan and without calling ha_index_init. In this case the
3226 ha_index_read_map is on the same index as the previous ha_index_scan.
3227 This is particularly used in conjunction with multi read ranges.
3228 */
3229
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3230 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3231 key_part_map keypart_map,
3232 enum ha_rkey_function find_flag) {
3233 int result;
3234 DBUG_TRACE;
3235 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3236 DBUG_ASSERT(inited == INDEX);
3237 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3238
3239 // Set status for the need to update generated fields
3240 m_update_generated_read_fields = table->has_gcol();
3241
3242 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, {
3243 result = index_read_map(buf, key, keypart_map, find_flag);
3244 })
3245 if (!result && m_update_generated_read_fields) {
3246 result = update_generated_read_fields(buf, table, active_index);
3247 m_update_generated_read_fields = false;
3248 }
3249 table->set_row_status_from_handler(result);
3250 return result;
3251 }
3252
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3253 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3254 key_part_map keypart_map) {
3255 int result;
3256 DBUG_TRACE;
3257 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3258 DBUG_ASSERT(inited == INDEX);
3259 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3260
3261 // Set status for the need to update generated fields
3262 m_update_generated_read_fields = table->has_gcol();
3263
3264 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3265 { result = index_read_last_map(buf, key, keypart_map); })
3266 if (!result && m_update_generated_read_fields) {
3267 result = update_generated_read_fields(buf, table, active_index);
3268 m_update_generated_read_fields = false;
3269 }
3270 table->set_row_status_from_handler(result);
3271 return result;
3272 }
3273
3274 /**
3275 Initializes an index and read it.
3276
3277 @see handler::ha_index_read_map.
3278 */
3279
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3280 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3281 key_part_map keypart_map,
3282 enum ha_rkey_function find_flag) {
3283 int result;
3284 DBUG_TRACE;
3285 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3286 DBUG_ASSERT(end_range == nullptr);
3287 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3288
3289 // Set status for the need to update generated fields
3290 m_update_generated_read_fields = table->has_gcol();
3291
3292 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result, {
3293 result = index_read_idx_map(buf, index, key, keypart_map, find_flag);
3294 })
3295 if (!result && m_update_generated_read_fields) {
3296 result = update_generated_read_fields(buf, table, index);
3297 m_update_generated_read_fields = false;
3298 }
3299 table->set_row_status_from_handler(result);
3300 return result;
3301 }
3302
3303 /**
3304 Reads the next row via index.
3305
3306 @param[out] buf Row data
3307
3308 @return Operation status.
3309 @retval 0 Success
3310 @retval HA_ERR_END_OF_FILE Row not found
3311 @retval != 0 Error
3312 */
3313
ha_index_next(uchar * buf)3314 int handler::ha_index_next(uchar *buf) {
3315 int result;
3316 DBUG_TRACE;
3317 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3318 DBUG_ASSERT(inited == INDEX);
3319 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3320
3321 // Set status for the need to update generated fields
3322 m_update_generated_read_fields = table->has_gcol();
3323
3324 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3325 { result = index_next(buf); })
3326 if (!result && m_update_generated_read_fields) {
3327 result = update_generated_read_fields(buf, table, active_index);
3328 m_update_generated_read_fields = false;
3329 }
3330 table->set_row_status_from_handler(result);
3331 return result;
3332 }
3333
3334 /**
3335 Reads the previous row via index.
3336
3337 @param[out] buf Row data
3338
3339 @return Operation status.
3340 @retval 0 Success
3341 @retval HA_ERR_END_OF_FILE Row not found
3342 @retval != 0 Error
3343 */
3344
ha_index_prev(uchar * buf)3345 int handler::ha_index_prev(uchar *buf) {
3346 int result;
3347 DBUG_TRACE;
3348 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3349 DBUG_ASSERT(inited == INDEX);
3350 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3351
3352 // Set status for the need to update generated fields
3353 m_update_generated_read_fields = table->has_gcol();
3354
3355 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3356 { result = index_prev(buf); })
3357 if (!result && m_update_generated_read_fields) {
3358 result = update_generated_read_fields(buf, table, active_index);
3359 m_update_generated_read_fields = false;
3360 }
3361 table->set_row_status_from_handler(result);
3362 return result;
3363 }
3364
3365 /**
3366 Reads the first row via index.
3367
3368 @param[out] buf Row data
3369
3370 @return Operation status.
3371 @retval 0 Success
3372 @retval HA_ERR_END_OF_FILE Row not found
3373 @retval != 0 Error
3374 */
3375
ha_index_first(uchar * buf)3376 int handler::ha_index_first(uchar *buf) {
3377 int result;
3378 DBUG_TRACE;
3379 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3380 DBUG_ASSERT(inited == INDEX);
3381 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3382
3383 // Set status for the need to update generated fields
3384 m_update_generated_read_fields = table->has_gcol();
3385
3386 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3387 { result = index_first(buf); })
3388 if (!result && m_update_generated_read_fields) {
3389 result = update_generated_read_fields(buf, table, active_index);
3390 m_update_generated_read_fields = false;
3391 }
3392 table->set_row_status_from_handler(result);
3393 return result;
3394 }
3395
3396 /**
3397 Reads the last row via index.
3398
3399 @param[out] buf Row data
3400
3401 @return Operation status.
3402 @retval 0 Success
3403 @retval HA_ERR_END_OF_FILE Row not found
3404 @retval != 0 Error
3405 */
3406
ha_index_last(uchar * buf)3407 int handler::ha_index_last(uchar *buf) {
3408 int result;
3409 DBUG_TRACE;
3410 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3411 DBUG_ASSERT(inited == INDEX);
3412 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3413
3414 // Set status for the need to update generated fields
3415 m_update_generated_read_fields = table->has_gcol();
3416
3417 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3418 { result = index_last(buf); })
3419 if (!result && m_update_generated_read_fields) {
3420 result = update_generated_read_fields(buf, table, active_index);
3421 m_update_generated_read_fields = false;
3422 }
3423 table->set_row_status_from_handler(result);
3424 return result;
3425 }
3426
3427 /**
3428 Reads the next same row via index.
3429
3430 @param[out] buf Row data
3431 @param key Key to search for
3432 @param keylen Length of key
3433
3434 @return Operation status.
3435 @retval 0 Success
3436 @retval HA_ERR_END_OF_FILE Row not found
3437 @retval != 0 Error
3438 */
3439
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3440 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen) {
3441 int result;
3442 DBUG_TRACE;
3443 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3444 DBUG_ASSERT(inited == INDEX);
3445 DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]);
3446
3447 // Set status for the need to update generated fields
3448 m_update_generated_read_fields = table->has_gcol();
3449
3450 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3451 { result = index_next_same(buf, key, keylen); })
3452 if (!result && m_update_generated_read_fields) {
3453 result = update_generated_read_fields(buf, table, active_index);
3454 m_update_generated_read_fields = false;
3455 }
3456 table->set_row_status_from_handler(result);
3457 return result;
3458 }
3459
3460 /**
3461 Read first row (only) from a table.
3462
3463 This is never called for tables whose storage engine do not contain exact
3464 statistics on number of records, e.g. InnoDB.
3465
3466 @note Since there is only one implementation for this function, it is
3467 non-virtual and does not call a protected inner function, like
3468 most other handler functions.
3469
3470 @note Implementation only calls other handler functions, so there is no need
3471 to update generated columns nor set table status.
3472 */
ha_read_first_row(uchar * buf,uint primary_key)3473 int handler::ha_read_first_row(uchar *buf, uint primary_key) {
3474 int error;
3475 DBUG_TRACE;
3476
3477 ha_statistic_increment(&System_status_var::ha_read_first_count);
3478
3479 /*
3480 If there is very few deleted rows in the table, find the first row by
3481 scanning the table.
3482 TODO remove the test for HA_READ_ORDER
3483 */
3484 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3485 !(index_flags(primary_key, 0, false) & HA_READ_ORDER)) {
3486 if (!(error = ha_rnd_init(true))) {
3487 while ((error = ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3488 /* skip deleted row */;
3489 const int end_error = ha_rnd_end();
3490 if (!error) error = end_error;
3491 }
3492 } else {
3493 /* Find the first row through the primary key */
3494 if (!(error = ha_index_init(primary_key, false))) {
3495 error = ha_index_first(buf);
3496 const int end_error = ha_index_end();
3497 if (!error) error = end_error;
3498 }
3499 }
3500 return error;
3501 }
3502
ha_index_read_pushed(uchar * buf,const uchar * key,key_part_map keypart_map)3503 int handler::ha_index_read_pushed(uchar *buf, const uchar *key,
3504 key_part_map keypart_map) {
3505 DBUG_TRACE;
3506
3507 // Set status for the need to update generated fields
3508 m_update_generated_read_fields = table->has_gcol();
3509
3510 int result = index_read_pushed(buf, key, keypart_map);
3511 if (!result && m_update_generated_read_fields) {
3512 result = update_generated_read_fields(buf, table, active_index);
3513 m_update_generated_read_fields = false;
3514 }
3515 table->set_row_status_from_handler(result);
3516 return result;
3517 }
3518
ha_index_next_pushed(uchar * buf)3519 int handler::ha_index_next_pushed(uchar *buf) {
3520 DBUG_TRACE;
3521
3522 // Set status for the need to update generated fields
3523 m_update_generated_read_fields = table->has_gcol();
3524
3525 int result = index_next_pushed(buf);
3526 if (!result && m_update_generated_read_fields) {
3527 result = update_generated_read_fields(buf, table, active_index);
3528 m_update_generated_read_fields = false;
3529 }
3530 table->set_row_status_from_handler(result);
3531 return result;
3532 }
3533
3534 /**
3535 Generate the next auto-increment number based on increment and offset.
3536 computes the lowest number
3537 - strictly greater than "nr"
3538 - of the form: auto_increment_offset + N * auto_increment_increment
3539 If overflow happened then return MAX_ULONGLONG value as an
3540 indication of overflow.
3541 In most cases increment= offset= 1, in which case we get:
3542 @verbatim 1,2,3,4,5,... @endverbatim
3543 If increment=10 and offset=5 and previous number is 1, we get:
3544 @verbatim 1,5,15,25,35,... @endverbatim
3545 */
compute_next_insert_id(ulonglong nr,struct System_variables * variables)3546 inline ulonglong compute_next_insert_id(ulonglong nr,
3547 struct System_variables *variables) {
3548 const ulonglong save_nr = nr;
3549
3550 if (variables->auto_increment_increment == 1)
3551 nr = nr + 1; // optimization of the formula below
3552 else {
3553 nr = (((nr + variables->auto_increment_increment -
3554 variables->auto_increment_offset)) /
3555 (ulonglong)variables->auto_increment_increment);
3556 nr = (nr * (ulonglong)variables->auto_increment_increment +
3557 variables->auto_increment_offset);
3558 }
3559
3560 if (unlikely(nr <= save_nr)) return ULLONG_MAX;
3561
3562 return nr;
3563 }
3564
adjust_next_insert_id_after_explicit_value(ulonglong nr)3565 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr) {
3566 /*
3567 If we have set THD::next_insert_id previously and plan to insert an
3568 explicitely-specified value larger than this, we need to increase
3569 THD::next_insert_id to be greater than the explicit value.
3570 */
3571 if ((next_insert_id > 0) && (nr >= next_insert_id))
3572 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3573 }
3574
3575 /** @brief
3576 Computes the largest number X:
3577 - smaller than or equal to "nr"
3578 - of the form: auto_increment_offset + N * auto_increment_increment
3579 where N>=0.
3580
3581 SYNOPSIS
3582 prev_insert_id
3583 nr Number to "round down"
3584 variables variables struct containing auto_increment_increment and
3585 auto_increment_offset
3586
3587 RETURN
3588 The number X if it exists, "nr" otherwise.
3589 */
prev_insert_id(ulonglong nr,struct System_variables * variables)3590 inline ulonglong prev_insert_id(ulonglong nr,
3591 struct System_variables *variables) {
3592 if (unlikely(nr < variables->auto_increment_offset)) {
3593 /*
3594 There's nothing good we can do here. That is a pathological case, where
3595 the offset is larger than the column's max possible value, i.e. not even
3596 the first sequence value may be inserted. User will receive warning.
3597 */
3598 DBUG_PRINT("info", ("auto_increment: nr: %lu cannot honour "
3599 "auto_increment_offset: %lu",
3600 (ulong)nr, variables->auto_increment_offset));
3601 return nr;
3602 }
3603 if (variables->auto_increment_increment == 1)
3604 return nr; // optimization of the formula below
3605 nr = (((nr - variables->auto_increment_offset)) /
3606 (ulonglong)variables->auto_increment_increment);
3607 return (nr * (ulonglong)variables->auto_increment_increment +
3608 variables->auto_increment_offset);
3609 }
3610
3611 /**
3612 Update the auto_increment field if necessary.
3613
3614 Updates columns with type NEXT_NUMBER if:
3615
3616 - If column value is set to NULL (in which case
3617 autoinc_field_has_explicit_non_null_value is 0)
3618 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3619 set. In the future we will only set NEXT_NUMBER fields if one sets them
3620 to NULL (or they are not included in the insert list).
3621
3622 In those cases, we check if the currently reserved interval still has
3623 values we have not used. If yes, we pick the smallest one and use it.
3624 Otherwise:
3625
3626 - If a list of intervals has been provided to the statement via SET
3627 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3628 first unused interval from this list, consider it as reserved.
3629
3630 - Otherwise we set the column for the first row to the value
3631 next_insert_id(get_auto_increment(column))) which is usually
3632 max-used-column-value+1.
3633 We call get_auto_increment() for the first row in a multi-row
3634 statement. get_auto_increment() will tell us the interval of values it
3635 reserved for us.
3636
3637 - In both cases, for the following rows we use those reserved values without
3638 calling the handler again (we just progress in the interval, computing
3639 each new value from the previous one). Until we have exhausted them, then
3640 we either take the next provided interval or call get_auto_increment()
3641 again to reserve a new interval.
3642
3643 - In both cases, the reserved intervals are remembered in
3644 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3645 binlogging; the last reserved interval is remembered in
3646 auto_inc_interval_for_cur_row. The number of reserved intervals is
3647 remembered in auto_inc_intervals_count. It differs from the number of
3648 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3649 latter list is cumulative over all statements forming one binlog event
3650 (when stored functions and triggers are used), and collapses two
3651 contiguous intervals in one (see its append() method).
3652
3653 The idea is that generated auto_increment values are predictable and
3654 independent of the column values in the table. This is needed to be
3655 able to replicate into a table that already has rows with a higher
3656 auto-increment value than the one that is inserted.
3657
3658 After we have already generated an auto-increment number and the user
3659 inserts a column with a higher value than the last used one, we will
3660 start counting from the inserted value.
3661
3662 This function's "outputs" are: the table's auto_increment field is filled
3663 with a value, thd->next_insert_id is filled with the value to use for the
3664 next row, if a value was autogenerated for the current row it is stored in
3665 thd->insert_id_for_cur_row, if get_auto_increment() was called
3666 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3667 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3668 this list.
3669
3670 @todo
3671 Replace all references to "next number" or NEXT_NUMBER to
3672 "auto_increment", everywhere (see below: there is
3673 table->autoinc_field_has_explicit_non_null_value, and there also exists
3674 table->next_number_field, it's not consistent).
3675
3676 @retval
3677 0 ok
3678 @retval
3679 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3680 returned ~(ulonglong) 0
3681 @retval
3682 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3683 failure.
3684 */
3685
3686 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3687 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3688 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3689
update_auto_increment()3690 int handler::update_auto_increment() {
3691 ulonglong nr, nb_reserved_values = 0;
3692 bool append = false;
3693 THD *thd = table->in_use;
3694 struct System_variables *variables = &thd->variables;
3695 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
3696 DBUG_TRACE;
3697
3698 /*
3699 next_insert_id is a "cursor" into the reserved interval, it may go greater
3700 than the interval, but not smaller.
3701 */
3702 DBUG_ASSERT(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3703
3704 if ((nr = table->next_number_field->val_int()) != 0 ||
3705 (table->autoinc_field_has_explicit_non_null_value &&
3706 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)) {
3707 /*
3708 First test if the query was aborted due to strict mode constraints.
3709 */
3710 if (thd->is_error() &&
3711 thd->get_stmt_da()->mysql_errno() == ER_TRUNCATED_WRONG_VALUE)
3712 return HA_ERR_AUTOINC_ERANGE;
3713
3714 /*
3715 Update next_insert_id if we had already generated a value in this
3716 statement (case of INSERT VALUES(null),(3763),(null):
3717 the last NULL needs to insert 3764, not the value of the first NULL plus
3718 1).
3719 Also we should take into account the the sign of the value.
3720 Since auto_increment value can't have negative value we should update
3721 next_insert_id only in case when we INSERTing explicit positive value.
3722 It means that for a table that has SIGNED INTEGER column when we execute
3723 the following statement
3724 INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3725 we shouldn't call adjust_next_insert_id_after_explicit_value()
3726 and the result row will be (1, -1, 2) (for new opened connection
3727 to the server). On the other hand, for the statement
3728 INSERT INTO t1 VALUES( NULL), (333), (NULL)
3729 we should call adjust_next_insert_id_after_explicit_value()
3730 and result row will be (1, 333, 334).
3731 */
3732 if (table->next_number_field->is_unsigned() || ((longlong)nr) > 0)
3733 adjust_next_insert_id_after_explicit_value(nr);
3734
3735 insert_id_for_cur_row = 0; // didn't generate anything
3736 return 0;
3737 }
3738
3739 if (next_insert_id > table->next_number_field->get_max_int_value())
3740 return HA_ERR_AUTOINC_READ_FAILED;
3741
3742 if ((nr = next_insert_id) >= auto_inc_interval_for_cur_row.maximum()) {
3743 /* next_insert_id is beyond what is reserved, so we reserve more. */
3744 const Discrete_interval *forced = thd->auto_inc_intervals_forced.get_next();
3745 if (forced != nullptr) {
3746 nr = forced->minimum();
3747 /*
3748 In a multi insert statement when the number of affected rows is known
3749 then reserve those many number of auto increment values. So that
3750 interval will be starting value to starting value + number of affected
3751 rows * increment of auto increment.
3752 */
3753 nb_reserved_values = (estimation_rows_to_insert > 0)
3754 ? estimation_rows_to_insert
3755 : forced->values();
3756 } else {
3757 /*
3758 handler::estimation_rows_to_insert was set by
3759 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3760 */
3761 ulonglong nb_desired_values;
3762 /*
3763 If an estimation was given to the engine:
3764 - use it.
3765 - if we already reserved numbers, it means the estimation was
3766 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3767 time, twice that the 3rd time etc.
3768 If no estimation was given, use those increasing defaults from the
3769 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3770 Don't go beyond a max to not reserve "way too much" (because
3771 reservation means potentially losing unused values).
3772 Note that in prelocked mode no estimation is given.
3773 */
3774
3775 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3776 nb_desired_values = estimation_rows_to_insert;
3777 else if ((auto_inc_intervals_count == 0) &&
3778 (thd->lex->bulk_insert_row_cnt > 0)) {
3779 /*
3780 For multi-row inserts, if the bulk inserts cannot be started, the
3781 handler::estimation_rows_to_insert will not be set. But we still
3782 want to reserve the autoinc values.
3783 */
3784 nb_desired_values = thd->lex->bulk_insert_row_cnt;
3785 } else /* go with the increasing defaults */
3786 {
3787 /* avoid overflow in formula, with this if() */
3788 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS) {
3789 nb_desired_values =
3790 AUTO_INC_DEFAULT_NB_ROWS * (1 << auto_inc_intervals_count);
3791 nb_desired_values =
3792 std::min(nb_desired_values, ulonglong(AUTO_INC_DEFAULT_NB_MAX));
3793 } else
3794 nb_desired_values = AUTO_INC_DEFAULT_NB_MAX;
3795 }
3796 /* This call ignores all its parameters but nr, currently */
3797 get_auto_increment(variables->auto_increment_offset,
3798 variables->auto_increment_increment, nb_desired_values,
3799 &nr, &nb_reserved_values);
3800 if (nr == ULLONG_MAX) return HA_ERR_AUTOINC_READ_FAILED; // Mark failure
3801
3802 /*
3803 That rounding below should not be needed when all engines actually
3804 respect offset and increment in get_auto_increment(). But they don't
3805 so we still do it. Wonder if for the not-first-in-index we should do
3806 it. Hope that this rounding didn't push us out of the interval; even
3807 if it did we cannot do anything about it (calling the engine again
3808 will not help as we inserted no row).
3809 */
3810 nr = compute_next_insert_id(nr - 1, variables);
3811 }
3812
3813 if (table->s->next_number_keypart == 0) {
3814 /* We must defer the appending until "nr" has been possibly truncated */
3815 append = true;
3816 } else {
3817 /*
3818 For such auto_increment there is no notion of interval, just a
3819 singleton. The interval is not even stored in
3820 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3821 for next row.
3822 */
3823 DBUG_PRINT("info", ("auto_increment: special not-first-in-index"));
3824 }
3825 }
3826
3827 if (unlikely(nr == ULLONG_MAX)) return HA_ERR_AUTOINC_ERANGE;
3828
3829 DBUG_PRINT("info", ("auto_increment: %lu", (ulong)nr));
3830
3831 if (unlikely(table->next_number_field->store((longlong)nr, true))) {
3832 /*
3833 first test if the query was aborted due to strict mode constraints
3834 */
3835 if (thd->is_error() &&
3836 thd->get_stmt_da()->mysql_errno() == ER_WARN_DATA_OUT_OF_RANGE)
3837 return HA_ERR_AUTOINC_ERANGE;
3838
3839 /*
3840 field refused this value (overflow) and truncated it, use the result of
3841 the truncation (which is going to be inserted); however we try to
3842 decrease it to honour auto_increment_* variables.
3843 That will shift the left bound of the reserved interval, we don't
3844 bother shifting the right bound (anyway any other value from this
3845 interval will cause a duplicate key).
3846 */
3847 nr = prev_insert_id(table->next_number_field->val_int(), variables);
3848 if (unlikely(table->next_number_field->store((longlong)nr, true)))
3849 nr = table->next_number_field->val_int();
3850 }
3851 if (append) {
3852 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3853 variables->auto_increment_increment);
3854 auto_inc_intervals_count++;
3855 /* Row-based replication does not need to store intervals in binlog */
3856 if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3857 thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(
3858 auto_inc_interval_for_cur_row.minimum(),
3859 auto_inc_interval_for_cur_row.values(),
3860 variables->auto_increment_increment);
3861 }
3862
3863 /*
3864 Record this autogenerated value. If the caller then
3865 succeeds to insert this value, it will call
3866 record_first_successful_insert_id_in_cur_stmt()
3867 which will set first_successful_insert_id_in_cur_stmt if it's not
3868 already set.
3869 */
3870 insert_id_for_cur_row = nr;
3871 /*
3872 Set next insert id to point to next auto-increment value to be able to
3873 handle multi-row statements.
3874 */
3875 set_next_insert_id(compute_next_insert_id(nr, variables));
3876
3877 return 0;
3878 }
3879
3880 /** @brief
3881 MySQL signal that it changed the column bitmap
3882
3883 USAGE
3884 This is for handlers that needs to setup their own column bitmaps.
3885 Normally the handler should set up their own column bitmaps in
3886 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3887 this.
3888
3889 The handler is allowd to do changes to the bitmap after a index_init or
3890 rnd_init() call is made as after this, MySQL will not use the bitmap
3891 for any program logic checking.
3892 */
column_bitmaps_signal()3893 void handler::column_bitmaps_signal() {
3894 DBUG_TRACE;
3895 DBUG_PRINT("info", ("read_set: %p write_set: %p", table->read_set,
3896 table->write_set));
3897 }
3898
3899 /**
3900 Reserves an interval of auto_increment values from the handler.
3901
3902 @param offset offset (modulus increment)
3903 @param increment increment between calls
3904 @param nb_desired_values how many values we want
3905 @param[out] first_value the first value reserved by the handler
3906 @param[out] nb_reserved_values how many values the handler reserved
3907
3908 offset and increment means that we want values to be of the form
3909 offset + N * increment, where N>=0 is integer.
3910 If the function sets *first_value to ULLONG_MAX it means an error.
3911 If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3912 reserved to "positive infinite".
3913 */
3914
get_auto_increment(ulonglong offset MY_ATTRIBUTE ((unused)),ulonglong increment MY_ATTRIBUTE ((unused)),ulonglong nb_desired_values MY_ATTRIBUTE ((unused)),ulonglong * first_value,ulonglong * nb_reserved_values)3915 void handler::get_auto_increment(
3916 ulonglong offset MY_ATTRIBUTE((unused)),
3917 ulonglong increment MY_ATTRIBUTE((unused)),
3918 ulonglong nb_desired_values MY_ATTRIBUTE((unused)), ulonglong *first_value,
3919 ulonglong *nb_reserved_values) {
3920 ulonglong nr;
3921 int error;
3922 DBUG_TRACE;
3923
3924 (void)extra(HA_EXTRA_KEYREAD);
3925 table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3926 table->read_set);
3927 column_bitmaps_signal();
3928
3929 if (ha_index_init(table->s->next_number_index, true)) {
3930 /* This should never happen, assert in debug, and fail in release build */
3931 DBUG_ASSERT(0);
3932 *first_value = ULLONG_MAX;
3933 return;
3934 }
3935
3936 if (table->s->next_number_keypart == 0) { // Autoincrement at key-start
3937 error = ha_index_last(table->record[1]);
3938 /*
3939 MySQL implicitely assumes such method does locking (as MySQL decides to
3940 use nr+increment without checking again with the handler, in
3941 handler::update_auto_increment()), so reserves to infinite.
3942 */
3943 *nb_reserved_values = ULLONG_MAX;
3944 } else {
3945 uchar key[MAX_KEY_LENGTH];
3946 key_copy(key, table->record[0],
3947 table->key_info + table->s->next_number_index,
3948 table->s->next_number_key_offset);
3949 error =
3950 ha_index_read_map(table->record[1], key,
3951 make_prev_keypart_map(table->s->next_number_keypart),
3952 HA_READ_PREFIX_LAST);
3953 /*
3954 MySQL needs to call us for next row: assume we are inserting ("a",null)
3955 here, we return 3, and next this statement will want to insert
3956 ("b",null): there is no reason why ("b",3+1) would be the good row to
3957 insert: maybe it already exists, maybe 3+1 is too large...
3958 */
3959 *nb_reserved_values = 1;
3960 }
3961
3962 if (error) {
3963 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) {
3964 /* No entry found, start with 1. */
3965 nr = 1;
3966 } else {
3967 DBUG_ASSERT(0);
3968 nr = ULLONG_MAX;
3969 }
3970 } else
3971 nr = ((ulonglong)table->next_number_field->val_int_offset(
3972 table->s->rec_buff_length) +
3973 1);
3974 ha_index_end();
3975 (void)extra(HA_EXTRA_NO_KEYREAD);
3976 *first_value = nr;
3977 }
3978
ha_release_auto_increment()3979 void handler::ha_release_auto_increment() {
3980 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
3981 m_lock_type != F_UNLCK ||
3982 (!next_insert_id && !insert_id_for_cur_row));
3983 DEBUG_SYNC(ha_thd(), "release_auto_increment");
3984 release_auto_increment();
3985 insert_id_for_cur_row = 0;
3986 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3987 auto_inc_intervals_count = 0;
3988 if (next_insert_id > 0) {
3989 next_insert_id = 0;
3990 /*
3991 this statement used forced auto_increment values if there were some,
3992 wipe them away for other statements.
3993 */
3994 table->in_use->auto_inc_intervals_forced.empty();
3995 }
3996 }
3997
table_case_name(const HA_CREATE_INFO * info,const char * name)3998 const char *table_case_name(const HA_CREATE_INFO *info, const char *name) {
3999 return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
4000 }
4001
4002 /**
4003 Construct and emit duplicate key error message using information
4004 from table's record buffer.
4005
4006 @param table TABLE object which record buffer should be used as
4007 source for column values.
4008 @param key Key description.
4009 @param msg Error message template to which key value should be
4010 added.
4011 @param errflag Flags for my_error() call.
4012 @param org_table_name The original table name (if any)
4013 */
4014
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag,const char * org_table_name)4015 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag,
4016 const char *org_table_name) {
4017 /* Write the duplicated key in the error message */
4018 char key_buff[MAX_KEY_LENGTH];
4019 String str(key_buff, sizeof(key_buff), system_charset_info);
4020 std::string key_name;
4021
4022 if (key == nullptr) {
4023 /* Key is unknown */
4024 key_name = "*UNKNOWN*";
4025 str.copy("", 0, system_charset_info);
4026
4027 } else {
4028 /* Table is opened and defined at this point */
4029 key_unpack(&str, table, key);
4030 size_t max_length = MYSQL_ERRMSG_SIZE - strlen(msg);
4031 if (str.length() >= max_length) {
4032 str.length(max_length - 4);
4033 str.append(STRING_WITH_LEN("..."));
4034 }
4035 str[str.length()] = 0;
4036 if (org_table_name != nullptr)
4037 key_name = org_table_name;
4038 else
4039 key_name = table->s->table_name.str;
4040 key_name += ".";
4041
4042 key_name += key->name;
4043 }
4044
4045 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), key_name.c_str());
4046 }
4047
4048 /**
4049 Construct and emit duplicate key error message using information
4050 from table's record buffer.
4051
4052 @sa print_keydup_error(table, key, msg, errflag).
4053 */
4054
print_keydup_error(TABLE * table,KEY * key,myf errflag,const char * org_table_name)4055 void print_keydup_error(TABLE *table, KEY *key, myf errflag,
4056 const char *org_table_name) {
4057 print_keydup_error(table, key,
4058 ER_THD(current_thd, ER_DUP_ENTRY_WITH_KEY_NAME), errflag,
4059 org_table_name);
4060 }
4061
4062 /**
4063 This method is used to analyse the error to see whether the error
4064 is ignorable or not. Further comments in header file.
4065 */
4066
is_ignorable_error(int error)4067 bool handler::is_ignorable_error(int error) {
4068 DBUG_TRACE;
4069
4070 // Catch errors that are ignorable
4071 switch (error) {
4072 // Error code 0 is not an error.
4073 case 0:
4074 // Dup key errors may be explicitly ignored.
4075 case HA_ERR_FOUND_DUPP_KEY:
4076 case HA_ERR_FOUND_DUPP_UNIQUE:
4077 // Foreign key constraint violations are ignorable.
4078 case HA_ERR_ROW_IS_REFERENCED:
4079 case HA_ERR_NO_REFERENCED_ROW:
4080 return true;
4081 }
4082
4083 // Default is that an error is not ignorable.
4084 return false;
4085 }
4086
4087 /**
4088 This method is used to analyse the error to see whether the error
4089 is fatal or not. Further comments in header file.
4090 */
4091
is_fatal_error(int error)4092 bool handler::is_fatal_error(int error) {
4093 DBUG_TRACE;
4094
4095 // No ignorable errors are fatal
4096 if (is_ignorable_error(error)) return false;
4097
4098 // Catch errors that are not fatal
4099 switch (error) {
4100 /*
4101 Deadlock and lock timeout cause transaction/statement rollback so that
4102 THD::is_fatal_sub_stmt_error will be set. This means that they will not
4103 be possible to handle by stored program handlers inside stored functions
4104 and triggers even if non-fatal.
4105 */
4106 case HA_ERR_LOCK_WAIT_TIMEOUT:
4107 case HA_ERR_LOCK_DEADLOCK:
4108 return false;
4109
4110 case HA_ERR_NULL_IN_SPATIAL:
4111 return false;
4112 }
4113
4114 // Default is that an error is fatal
4115 return true;
4116 }
4117
4118 /**
4119 Print error that we got from handler function.
4120
4121 @note
4122 In case of delete table it's only safe to use the following parts of
4123 the 'table' structure:
4124 - table->s->path
4125 - table->alias
4126 */
print_error(int error,myf errflag)4127 void handler::print_error(int error, myf errflag) {
4128 THD *thd = current_thd;
4129 Foreign_key_error_handler foreign_key_error_handler(thd, this);
4130
4131 DBUG_TRACE;
4132 DBUG_PRINT("enter", ("error: %d", error));
4133
4134 int textno = ER_GET_ERRNO;
4135 switch (error) {
4136 case EACCES:
4137 textno = ER_OPEN_AS_READONLY;
4138 break;
4139 case EAGAIN:
4140 textno = ER_FILE_USED;
4141 break;
4142 case ENOENT: {
4143 char errbuf[MYSYS_STRERROR_SIZE];
4144 textno = ER_FILE_NOT_FOUND;
4145 my_error(textno, errflag, table_share->table_name.str, error,
4146 my_strerror(errbuf, sizeof(errbuf), error));
4147 } break;
4148 case HA_ERR_KEY_NOT_FOUND:
4149 case HA_ERR_NO_ACTIVE_RECORD:
4150 case HA_ERR_RECORD_DELETED:
4151 case HA_ERR_END_OF_FILE:
4152 textno = ER_KEY_NOT_FOUND;
4153 break;
4154 case HA_ERR_WRONG_MRG_TABLE_DEF:
4155 textno = ER_WRONG_MRG_TABLE;
4156 break;
4157 case HA_ERR_FOUND_DUPP_KEY: {
4158 uint key_nr = table ? get_dup_key(error) : -1;
4159 if ((int)key_nr >= 0) {
4160 print_keydup_error(
4161 table, key_nr == MAX_KEY ? nullptr : &table->key_info[key_nr],
4162 errflag);
4163 return;
4164 }
4165 textno = ER_DUP_KEY;
4166 break;
4167 }
4168 case HA_ERR_FOREIGN_DUPLICATE_KEY: {
4169 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
4170 m_lock_type != F_UNLCK);
4171
4172 char rec_buf[MAX_KEY_LENGTH];
4173 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
4174 /* Table is opened and defined at this point */
4175
4176 /*
4177 Just print the subset of fields that are part of the first index,
4178 printing the whole row from there is not easy.
4179 */
4180 key_unpack(&rec, table, &table->key_info[0]);
4181
4182 char child_table_name[NAME_LEN + 1];
4183 char child_key_name[NAME_LEN + 1];
4184 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4185 child_key_name, sizeof(child_key_name))) {
4186 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4187 table_share->table_name.str, rec.c_ptr_safe(),
4188 child_table_name, child_key_name);
4189 } else {
4190 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4191 table_share->table_name.str, rec.c_ptr_safe());
4192 }
4193 return;
4194 }
4195 case HA_ERR_NULL_IN_SPATIAL:
4196 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4197 return;
4198 case HA_ERR_FOUND_DUPP_UNIQUE:
4199 textno = ER_DUP_UNIQUE;
4200 break;
4201 case HA_ERR_RECORD_CHANGED:
4202 textno = ER_CHECKREAD;
4203 break;
4204 case HA_ERR_CRASHED:
4205 textno = ER_NOT_KEYFILE;
4206 break;
4207 case HA_ERR_WRONG_IN_RECORD:
4208 textno = ER_CRASHED_ON_USAGE;
4209 break;
4210 case HA_ERR_CRASHED_ON_USAGE:
4211 textno = ER_CRASHED_ON_USAGE;
4212 break;
4213 case HA_ERR_NOT_A_TABLE:
4214 textno = error;
4215 break;
4216 case HA_ERR_CRASHED_ON_REPAIR:
4217 textno = ER_CRASHED_ON_REPAIR;
4218 break;
4219 case HA_ERR_OUT_OF_MEM:
4220 textno = ER_OUT_OF_RESOURCES;
4221 break;
4222 case HA_ERR_SE_OUT_OF_MEMORY:
4223 my_error(ER_ENGINE_OUT_OF_MEMORY, errflag, table->file->table_type());
4224 return;
4225 case HA_ERR_WRONG_COMMAND:
4226 textno = ER_ILLEGAL_HA;
4227 break;
4228 case HA_ERR_OLD_FILE:
4229 textno = ER_OLD_KEYFILE;
4230 break;
4231 case HA_ERR_UNSUPPORTED:
4232 textno = ER_UNSUPPORTED_EXTENSION;
4233 break;
4234 case HA_ERR_RECORD_FILE_FULL:
4235 case HA_ERR_INDEX_FILE_FULL: {
4236 textno = ER_RECORD_FILE_FULL;
4237 /* Write the error message to error log */
4238 LogErr(ERROR_LEVEL, ER_SERVER_RECORD_FILE_FULL,
4239 table_share->table_name.str);
4240 break;
4241 }
4242 case HA_ERR_DISK_FULL_NOWAIT: {
4243 textno = ER_DISK_FULL_NOWAIT;
4244 /* Write the error message to error log */
4245 LogErr(ERROR_LEVEL, ER_SERVER_DISK_FULL_NOWAIT,
4246 table_share->table_name.str);
4247 break;
4248 }
4249 case HA_ERR_LOCK_WAIT_TIMEOUT:
4250 textno = ER_LOCK_WAIT_TIMEOUT;
4251 break;
4252 case HA_ERR_LOCK_TABLE_FULL:
4253 textno = ER_LOCK_TABLE_FULL;
4254 break;
4255 case HA_ERR_LOCK_DEADLOCK:
4256 textno = ER_LOCK_DEADLOCK;
4257 break;
4258 case HA_ERR_READ_ONLY_TRANSACTION:
4259 textno = ER_READ_ONLY_TRANSACTION;
4260 break;
4261 case HA_ERR_CANNOT_ADD_FOREIGN:
4262 textno = ER_CANNOT_ADD_FOREIGN;
4263 break;
4264 case HA_ERR_ROW_IS_REFERENCED: {
4265 String str;
4266 /*
4267 Manipulate the error message while handling the error
4268 condition based on the access check.
4269 */
4270 thd->push_internal_handler(&foreign_key_error_handler);
4271 get_error_message(error, &str);
4272 my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4273 thd->pop_internal_handler();
4274 return;
4275 }
4276 case HA_ERR_NO_REFERENCED_ROW: {
4277 String str;
4278 /*
4279 Manipulate the error message while handling the error
4280 condition based on the access check.
4281 */
4282 thd->push_internal_handler(&foreign_key_error_handler);
4283 get_error_message(error, &str);
4284 my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4285 thd->pop_internal_handler();
4286 return;
4287 }
4288 case HA_ERR_TABLE_DEF_CHANGED:
4289 textno = ER_TABLE_DEF_CHANGED;
4290 break;
4291 case HA_ERR_NO_SUCH_TABLE:
4292 my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4293 table_share->table_name.str);
4294 return;
4295 case HA_ERR_RBR_LOGGING_FAILED:
4296 textno = ER_BINLOG_ROW_LOGGING_FAILED;
4297 break;
4298 case HA_ERR_DROP_INDEX_FK: {
4299 const char *ptr = "???";
4300 uint key_nr = table ? get_dup_key(error) : -1;
4301 if ((int)key_nr >= 0 && key_nr != MAX_KEY)
4302 ptr = table->key_info[key_nr].name;
4303 my_error(ER_DROP_INDEX_FK, errflag, ptr);
4304 return;
4305 }
4306 case HA_ERR_TABLE_NEEDS_UPGRADE:
4307 textno = ER_TABLE_NEEDS_UPGRADE;
4308 break;
4309 case HA_ERR_NO_PARTITION_FOUND:
4310 textno = ER_WRONG_PARTITION_NAME;
4311 break;
4312 case HA_ERR_TABLE_READONLY:
4313 textno = ER_OPEN_AS_READONLY;
4314 break;
4315 case HA_ERR_AUTOINC_READ_FAILED:
4316 textno = ER_AUTOINC_READ_FAILED;
4317 break;
4318 case HA_ERR_AUTOINC_ERANGE:
4319 textno = ER_WARN_DATA_OUT_OF_RANGE;
4320 break;
4321 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4322 textno = ER_TOO_MANY_CONCURRENT_TRXS;
4323 break;
4324 case HA_ERR_INDEX_COL_TOO_LONG:
4325 textno = ER_INDEX_COLUMN_TOO_LONG;
4326 break;
4327 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4328 textno = ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4329 break;
4330 case HA_ERR_INDEX_CORRUPT:
4331 textno = ER_INDEX_CORRUPT;
4332 break;
4333 case HA_ERR_UNDO_REC_TOO_BIG:
4334 textno = ER_UNDO_RECORD_TOO_BIG;
4335 break;
4336 case HA_ERR_TABLE_IN_FK_CHECK:
4337 textno = ER_TABLE_IN_FK_CHECK;
4338 break;
4339 case HA_WRONG_CREATE_OPTION:
4340 textno = ER_ILLEGAL_HA;
4341 break;
4342 case HA_MISSING_CREATE_OPTION: {
4343 const char *engine = table_type();
4344 my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4345 return;
4346 }
4347 case HA_ERR_TOO_MANY_FIELDS:
4348 textno = ER_TOO_MANY_FIELDS;
4349 break;
4350 case HA_ERR_INNODB_READ_ONLY:
4351 textno = ER_INNODB_READ_ONLY;
4352 break;
4353 case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4354 textno = ER_TEMP_FILE_WRITE_FAILURE;
4355 break;
4356 case HA_ERR_INNODB_FORCED_RECOVERY:
4357 textno = ER_INNODB_FORCED_RECOVERY;
4358 break;
4359 case HA_ERR_TABLE_CORRUPT:
4360 my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4361 table_share->table_name.str);
4362 return;
4363 case HA_ERR_QUERY_INTERRUPTED:
4364 textno = ER_QUERY_INTERRUPTED;
4365 break;
4366 case HA_ERR_TABLESPACE_MISSING: {
4367 char errbuf[MYSYS_STRERROR_SIZE];
4368 snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4369 table_share->table_name.str);
4370 my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4371 return;
4372 }
4373 case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4374 my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4375 table_share->table_name.str);
4376 return;
4377 case HA_ERR_WRONG_FILE_NAME:
4378 my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4379 return;
4380 case HA_ERR_NOT_ALLOWED_COMMAND:
4381 textno = ER_NOT_ALLOWED_COMMAND;
4382 break;
4383 case HA_ERR_NO_SESSION_TEMP:
4384 textno = ER_NO_SESSION_TEMP;
4385 break;
4386 case HA_ERR_WRONG_TABLE_NAME:
4387 textno = ER_WRONG_TABLE_NAME;
4388 break;
4389 case HA_ERR_TOO_LONG_PATH:
4390 textno = ER_TABLE_NAME_CAUSES_TOO_LONG_PATH;
4391 break;
4392 default: {
4393 /* The error was "unknown" to this function.
4394 Ask handler if it has got a message for this error */
4395 String str;
4396 bool temporary = get_error_message(error, &str);
4397 if (!str.is_empty()) {
4398 const char *engine = table_type();
4399 if (temporary)
4400 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4401 else
4402 my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4403 } else {
4404 char errbuf[MYSQL_ERRMSG_SIZE];
4405 my_error(ER_GET_ERRNO, errflag, error,
4406 my_strerror(errbuf, MYSQL_ERRMSG_SIZE, error));
4407 }
4408 return;
4409 }
4410 }
4411 if (textno != ER_FILE_NOT_FOUND)
4412 my_error(textno, errflag, table_share->table_name.str, error);
4413 }
4414
4415 /**
4416 Return an error message specific to this handler.
4417
4418 @param error error code previously returned by handler
4419 @param buf pointer to String where to add error message
4420
4421 @return
4422 Returns true if this is a temporary error
4423 */
get_error_message(int error MY_ATTRIBUTE ((unused)),String * buf MY_ATTRIBUTE ((unused)))4424 bool handler::get_error_message(int error MY_ATTRIBUTE((unused)),
4425 String *buf MY_ATTRIBUTE((unused))) {
4426 return false;
4427 }
4428
4429 /**
4430 Check for incompatible collation changes.
4431
4432 @retval
4433 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
4434 @retval
4435 0 No upgrade required.
4436 */
4437
check_collation_compatibility()4438 int handler::check_collation_compatibility() {
4439 ulong mysql_version = table->s->mysql_version;
4440
4441 if (mysql_version < 50124) {
4442 KEY *key = table->key_info;
4443 KEY *key_end = key + table->s->keys;
4444 for (; key < key_end; key++) {
4445 KEY_PART_INFO *key_part = key->key_part;
4446 KEY_PART_INFO *key_part_end = key_part + key->user_defined_key_parts;
4447 for (; key_part < key_part_end; key_part++) {
4448 if (!key_part->fieldnr) continue;
4449 Field *field = table->field[key_part->fieldnr - 1];
4450 uint cs_number = field->charset()->number;
4451 if ((mysql_version < 50048 &&
4452 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4453 cs_number == 41 || /* latin7_general_ci - bug #29461 */
4454 cs_number == 42 || /* latin7_general_cs - bug #29461 */
4455 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4456 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4457 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4458 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4459 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4460 (mysql_version < 50124 &&
4461 (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4462 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4463 return HA_ADMIN_NEEDS_UPGRADE;
4464 }
4465 }
4466 }
4467 return 0;
4468 }
4469
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4470 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt) {
4471 int error;
4472 KEY *keyinfo, *keyend;
4473 KEY_PART_INFO *keypart, *keypartend;
4474
4475 if (!table->s->mysql_version) {
4476 /* check for blob-in-key error */
4477 keyinfo = table->key_info;
4478 keyend = table->key_info + table->s->keys;
4479 for (; keyinfo < keyend; keyinfo++) {
4480 keypart = keyinfo->key_part;
4481 keypartend = keypart + keyinfo->user_defined_key_parts;
4482 for (; keypart < keypartend; keypart++) {
4483 if (!keypart->fieldnr) continue;
4484 Field *field = table->field[keypart->fieldnr - 1];
4485 if (field->type() == MYSQL_TYPE_BLOB) {
4486 if (check_opt->sql_flags & TT_FOR_UPGRADE)
4487 check_opt->flags = T_MEDIUM;
4488 return HA_ADMIN_NEEDS_CHECK;
4489 }
4490 }
4491 }
4492 }
4493
4494 if ((error = check_collation_compatibility())) return error;
4495
4496 return check_for_upgrade(check_opt);
4497 }
4498
4499 // Function identifies any old data type present in table.
check_table_for_old_types(const TABLE * table,bool check_temporal_upgrade)4500 int check_table_for_old_types(const TABLE *table, bool check_temporal_upgrade) {
4501 Field **field;
4502
4503 for (field = table->field; (*field); field++) {
4504 if (table->s->mysql_version == 0) // prior to MySQL 5.0
4505 {
4506 /* check for bad DECIMAL field */
4507 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL) {
4508 return HA_ADMIN_NEEDS_ALTER;
4509 }
4510 if ((*field)->type() == MYSQL_TYPE_VAR_STRING) {
4511 return HA_ADMIN_NEEDS_ALTER;
4512 }
4513 }
4514
4515 /*
4516 Check for old DECIMAL field.
4517
4518 Above check does not take into account for pre 5.0 decimal types which can
4519 be present in the data directory if user did in-place upgrade from
4520 mysql-4.1 to mysql-5.0.
4521 */
4522 if ((*field)->type() == MYSQL_TYPE_DECIMAL) {
4523 return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4524 }
4525
4526 if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4527 return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4528
4529 if (check_temporal_upgrade) {
4530 if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4531 ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4532 ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4533 return HA_ADMIN_NEEDS_ALTER;
4534 }
4535 }
4536 return 0;
4537 }
4538
4539 /**
4540 @return
4541 key if error because of duplicated keys
4542 */
get_dup_key(int error)4543 uint handler::get_dup_key(int error) {
4544 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4545 DBUG_TRACE;
4546 table->file->errkey = (uint)-1;
4547 if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE ||
4548 error == HA_ERR_NULL_IN_SPATIAL || error == HA_ERR_DROP_INDEX_FK)
4549 table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4550 return table->file->errkey;
4551 }
4552
get_foreign_dup_key(char *,uint,char *,uint)4553 bool handler::get_foreign_dup_key(char *, uint, char *, uint) {
4554 DBUG_ASSERT(false);
4555 return (false);
4556 }
4557
4558 /**
4559 Delete all files with extension from handlerton::file_extensions.
4560
4561 @param name Base name of table
4562
4563 @note
4564 We assume that the handler may return more extensions than
4565 was actually used for the file.
4566
4567 @retval
4568 0 If we successfully deleted at least one file from base_ext and
4569 didn't get any other errors than ENOENT
4570 @retval
4571 !0 Error
4572 */
delete_table(const char * name,const dd::Table *)4573 int handler::delete_table(const char *name, const dd::Table *) {
4574 int saved_error = 0;
4575 int error = 0;
4576 int enoent_or_zero = ENOENT; // Error if no file was deleted
4577 char buff[FN_REFLEN];
4578 const char **start_ext;
4579
4580 DBUG_ASSERT(m_lock_type == F_UNLCK);
4581
4582 if (!(start_ext = ht->file_extensions)) return 0;
4583 for (const char **ext = start_ext; *ext; ext++) {
4584 fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME | MY_APPEND_EXT);
4585 if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0))) {
4586 if (my_errno() != ENOENT) {
4587 /*
4588 If error on the first existing file, return the error.
4589 Otherwise delete as much as possible.
4590 */
4591 if (enoent_or_zero) return my_errno();
4592 saved_error = my_errno();
4593 }
4594 } else
4595 enoent_or_zero = 0; // No error for ENOENT
4596 error = enoent_or_zero;
4597 }
4598 return saved_error ? saved_error : error;
4599 }
4600
rename_table(const char * from,const char * to,const dd::Table * from_table_def MY_ATTRIBUTE ((unused)),dd::Table * to_table_def MY_ATTRIBUTE ((unused)))4601 int handler::rename_table(const char *from, const char *to,
4602 const dd::Table *from_table_def
4603 MY_ATTRIBUTE((unused)),
4604 dd::Table *to_table_def MY_ATTRIBUTE((unused))) {
4605 int error = 0;
4606 const char **ext, **start_ext;
4607
4608 if (!(start_ext = ht->file_extensions)) return 0;
4609 for (ext = start_ext; *ext; ext++) {
4610 if (rename_file_ext(from, to, *ext)) {
4611 error = my_errno();
4612 if (error != ENOENT) break;
4613 error = 0;
4614 }
4615 }
4616 if (error) {
4617 /* Try to revert the rename. Ignore errors. */
4618 for (; ext >= start_ext; ext--) rename_file_ext(to, from, *ext);
4619 }
4620 return error;
4621 }
4622
drop_table(const char * name)4623 void handler::drop_table(const char *name) {
4624 close();
4625 delete_table(name, nullptr);
4626 }
4627
4628 /**
4629 Performs checks upon the table.
4630
4631 @param thd thread doing CHECK TABLE operation
4632 @param check_opt options from the parser
4633
4634 @retval
4635 HA_ADMIN_OK Successful upgrade
4636 @retval
4637 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4638 @retval
4639 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4640 @retval
4641 HA_ADMIN_NOT_IMPLEMENTED
4642 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4643 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt) {
4644 int error;
4645 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4646
4647 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4648 (check_opt->sql_flags & TT_FOR_UPGRADE))
4649 return 0;
4650
4651 if (table->s->mysql_version < MYSQL_VERSION_ID) {
4652 // Check for old temporal format if avoid_temporal_upgrade is disabled.
4653 mysql_mutex_lock(&LOCK_global_system_variables);
4654 const bool check_temporal_upgrade = !avoid_temporal_upgrade;
4655 mysql_mutex_unlock(&LOCK_global_system_variables);
4656
4657 if ((error = check_table_for_old_types(table, check_temporal_upgrade)))
4658 return error;
4659 error = ha_check_for_upgrade(check_opt);
4660 if (error && (error != HA_ADMIN_NEEDS_CHECK)) return error;
4661 if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE)) return 0;
4662 }
4663 return check(thd, check_opt);
4664 }
4665
4666 /**
4667 A helper function to mark a transaction read-write,
4668 if it is started.
4669 */
4670
mark_trx_read_write()4671 void handler::mark_trx_read_write() {
4672 Ha_trx_info *ha_info = &ha_thd()->get_ha_data(ht->slot)->ha_info[0];
4673 /*
4674 When a storage engine method is called, the transaction must
4675 have been started, unless it's a DDL call, for which the
4676 storage engine starts the transaction internally, and commits
4677 it internally, without registering in the ha_list.
4678 Unfortunately here we can't know for sure if the engine
4679 has registered the transaction or not, so we must check.
4680 */
4681 if (ha_info->is_started()) {
4682 DBUG_ASSERT(has_transactions());
4683 /*
4684 table_share can be NULL in ha_delete_table(). See implementation
4685 of standalone function ha_delete_table() in sql_base.cc.
4686 */
4687 if (table_share == nullptr || table_share->tmp_table == NO_TMP_TABLE) {
4688 /* TempTable and Heap tables don't use/support transactions. */
4689 ha_info->set_trx_read_write();
4690 }
4691 }
4692 }
4693
4694 /**
4695 Repair table: public interface.
4696
4697 @sa handler::repair()
4698 */
4699
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4700 int handler::ha_repair(THD *thd, HA_CHECK_OPT *check_opt) {
4701 int result;
4702 mark_trx_read_write();
4703
4704 result = repair(thd, check_opt);
4705 DBUG_ASSERT(result == HA_ADMIN_NOT_IMPLEMENTED ||
4706 ha_table_flags() & HA_CAN_REPAIR);
4707
4708 // TODO: Check if table version in DD needs to be updated.
4709 // Previously we checked/updated FRM version here.
4710 return result;
4711 }
4712
4713 /**
4714 Start bulk insert.
4715
4716 Allow the handler to optimize for multiple row insert.
4717
4718 @note rows == 0 means we will probably insert many rows.
4719
4720 @param rows Estimated rows to insert
4721 */
4722
ha_start_bulk_insert(ha_rows rows)4723 void handler::ha_start_bulk_insert(ha_rows rows) {
4724 DBUG_TRACE;
4725 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4726 estimation_rows_to_insert = rows;
4727 start_bulk_insert(rows);
4728 }
4729
4730 /**
4731 End bulk insert.
4732
4733 @return Operation status
4734 @retval 0 Success
4735 @retval != 0 Failure (error code returned)
4736 */
4737
ha_end_bulk_insert()4738 int handler::ha_end_bulk_insert() {
4739 DBUG_TRACE;
4740 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4741 estimation_rows_to_insert = 0;
4742 return end_bulk_insert();
4743 }
4744
4745 /**
4746 Bulk update row: public interface.
4747
4748 @sa handler::bulk_update_row()
4749 */
4750
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4751 int handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4752 uint *dup_key_found) {
4753 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4754 mark_trx_read_write();
4755
4756 return bulk_update_row(old_data, new_data, dup_key_found);
4757 }
4758
4759 /**
4760 Delete all rows: public interface.
4761
4762 @sa handler::delete_all_rows()
4763 */
4764
ha_delete_all_rows()4765 int handler::ha_delete_all_rows() {
4766 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4767 mark_trx_read_write();
4768
4769 return delete_all_rows();
4770 }
4771
4772 /**
4773 Truncate table: public interface.
4774
4775 @sa handler::truncate()
4776 */
4777
ha_truncate(dd::Table * table_def)4778 int handler::ha_truncate(dd::Table *table_def) {
4779 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4780 mark_trx_read_write();
4781
4782 return truncate(table_def);
4783 }
4784
4785 /**
4786 Optimize table: public interface.
4787
4788 @sa handler::optimize()
4789 */
4790
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4791 int handler::ha_optimize(THD *thd, HA_CHECK_OPT *check_opt) {
4792 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4793 mark_trx_read_write();
4794
4795 return optimize(thd, check_opt);
4796 }
4797
4798 /**
4799 Analyze table: public interface.
4800
4801 @sa handler::analyze()
4802 */
4803
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4804 int handler::ha_analyze(THD *thd, HA_CHECK_OPT *check_opt) {
4805 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4806 mark_trx_read_write();
4807
4808 return analyze(thd, check_opt);
4809 }
4810
4811 /**
4812 Check and repair table: public interface.
4813
4814 @sa handler::check_and_repair()
4815 */
4816
ha_check_and_repair(THD * thd)4817 bool handler::ha_check_and_repair(THD *thd) {
4818 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_UNLCK);
4819 mark_trx_read_write();
4820
4821 return check_and_repair(thd);
4822 }
4823
4824 /**
4825 Disable indexes: public interface.
4826
4827 @sa handler::disable_indexes()
4828 */
4829
ha_disable_indexes(uint mode)4830 int handler::ha_disable_indexes(uint mode) {
4831 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4832 mark_trx_read_write();
4833
4834 return disable_indexes(mode);
4835 }
4836
4837 /**
4838 Enable indexes: public interface.
4839
4840 @sa handler::enable_indexes()
4841 */
4842
ha_enable_indexes(uint mode)4843 int handler::ha_enable_indexes(uint mode) {
4844 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4845 mark_trx_read_write();
4846
4847 return enable_indexes(mode);
4848 }
4849
4850 /**
4851 Discard or import tablespace: public interface.
4852
4853 @sa handler::discard_or_import_tablespace()
4854 */
4855
ha_discard_or_import_tablespace(bool discard,dd::Table * table_def)4856 int handler::ha_discard_or_import_tablespace(bool discard,
4857 dd::Table *table_def) {
4858 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
4859 mark_trx_read_write();
4860
4861 return discard_or_import_tablespace(discard, table_def);
4862 }
4863
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,const dd::Table * old_table_def,dd::Table * new_table_def)4864 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4865 Alter_inplace_info *ha_alter_info,
4866 const dd::Table *old_table_def,
4867 dd::Table *new_table_def) {
4868 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK);
4869 mark_trx_read_write();
4870
4871 return prepare_inplace_alter_table(altered_table, ha_alter_info,
4872 old_table_def, new_table_def);
4873 }
4874
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit,const dd::Table * old_table_def,dd::Table * new_table_def)4875 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4876 Alter_inplace_info *ha_alter_info,
4877 bool commit,
4878 const dd::Table *old_table_def,
4879 dd::Table *new_table_def) {
4880 /*
4881 At this point we should have an exclusive metadata lock on the table.
4882 The exception is if we're about to roll back changes (commit= false).
4883 In this case, we might be rolling back after a failed lock upgrade,
4884 so we could be holding the same lock level as for inplace_alter_table().
4885 */
4886 DBUG_ASSERT(ha_thd()->mdl_context.owns_equal_or_stronger_lock(
4887 MDL_key::TABLE, table->s->db.str, table->s->table_name.str,
4888 MDL_EXCLUSIVE) ||
4889 !commit);
4890
4891 return commit_inplace_alter_table(altered_table, ha_alter_info, commit,
4892 old_table_def, new_table_def);
4893 }
4894
4895 /*
4896 Default implementation to support in-place/instant alter table
4897 for operations which do not affect table data.
4898 */
4899
check_if_supported_inplace_alter(TABLE * altered_table MY_ATTRIBUTE ((unused)),Alter_inplace_info * ha_alter_info)4900 enum_alter_inplace_result handler::check_if_supported_inplace_alter(
4901 TABLE *altered_table MY_ATTRIBUTE((unused)),
4902 Alter_inplace_info *ha_alter_info) {
4903 DBUG_TRACE;
4904
4905 HA_CREATE_INFO *create_info = ha_alter_info->create_info;
4906
4907 Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations =
4908 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4909 Alter_inplace_info::ALTER_COLUMN_NAME |
4910 Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4911 Alter_inplace_info::CHANGE_CREATE_OPTION |
4912 Alter_inplace_info::ALTER_RENAME | Alter_inplace_info::RENAME_INDEX |
4913 Alter_inplace_info::ALTER_INDEX_COMMENT |
4914 Alter_inplace_info::CHANGE_INDEX_OPTION |
4915 Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4916
4917 /* Is there at least one operation that requires copy algorithm? */
4918 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4919 return HA_ALTER_INPLACE_NOT_SUPPORTED;
4920
4921 /*
4922 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4923 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4924 change column charsets and so not supported in-place through
4925 old API.
4926
4927 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4928 not supported as in-place operations in old API either.
4929 */
4930 if (create_info->used_fields &
4931 (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET |
4932 HA_CREATE_USED_PACK_KEYS | HA_CREATE_USED_MAX_ROWS) ||
4933 (table->s->row_type != create_info->row_type))
4934 return HA_ALTER_INPLACE_NOT_SUPPORTED;
4935
4936 // The presence of engine attributes does not prevent inplace so
4937 // that we get the same behavior as COMMENT. If SEs support engine
4938 // attribute values which are incompatible with INPLACE the need to
4939 // check for that when overriding (as they must do for parsed
4940 // comments).
4941
4942 uint table_changes = (ha_alter_info->handler_flags &
4943 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH)
4944 ? IS_EQUAL_PACK_LENGTH
4945 : IS_EQUAL_YES;
4946 if (table->file->check_if_incompatible_data(create_info, table_changes) ==
4947 COMPATIBLE_DATA_YES)
4948 return HA_ALTER_INPLACE_INSTANT;
4949
4950 return HA_ALTER_INPLACE_NOT_SUPPORTED;
4951 }
4952
report_unsupported_error(const char * not_supported,const char * try_instead)4953 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4954 const char *try_instead) {
4955 if (unsupported_reason == nullptr)
4956 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0), not_supported,
4957 try_instead);
4958 else
4959 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0), not_supported,
4960 unsupported_reason, try_instead);
4961 }
4962
4963 /**
4964 Rename table: public interface.
4965
4966 @sa handler::rename_table()
4967 */
4968
ha_rename_table(const char * from,const char * to,const dd::Table * from_table_def,dd::Table * to_table_def)4969 int handler::ha_rename_table(const char *from, const char *to,
4970 const dd::Table *from_table_def,
4971 dd::Table *to_table_def) {
4972 DBUG_ASSERT(m_lock_type == F_UNLCK);
4973 mark_trx_read_write();
4974
4975 return rename_table(from, to, from_table_def, to_table_def);
4976 }
4977
4978 /**
4979 Delete table: public interface.
4980
4981 @sa handler::delete_table()
4982 */
4983
ha_delete_table(const char * name,const dd::Table * table_def)4984 int handler::ha_delete_table(const char *name, const dd::Table *table_def) {
4985 DBUG_ASSERT(m_lock_type == F_UNLCK);
4986 mark_trx_read_write();
4987
4988 return delete_table(name, table_def);
4989 }
4990
4991 /**
4992 Drop table in the engine: public interface.
4993
4994 @sa handler::drop_table()
4995 */
4996
ha_drop_table(const char * name)4997 void handler::ha_drop_table(const char *name) {
4998 DBUG_ASSERT(m_lock_type == F_UNLCK);
4999 mark_trx_read_write();
5000
5001 return drop_table(name);
5002 }
5003
5004 /**
5005 Create a table in the engine: public interface.
5006
5007 @sa handler::create()
5008 */
5009
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info,dd::Table * table_def)5010 int handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info,
5011 dd::Table *table_def) {
5012 DBUG_ASSERT(m_lock_type == F_UNLCK);
5013 mark_trx_read_write();
5014
5015 return create(name, form, info, table_def);
5016 }
5017
5018 /**
5019 * Prepares the secondary engine for table load.
5020 *
5021 * @param table The table to load into the secondary engine. Its read_set tells
5022 * which columns to load.
5023 *
5024 * @sa handler::prepare_load_table()
5025 */
ha_prepare_load_table(const TABLE & table)5026 int handler::ha_prepare_load_table(const TABLE &table) {
5027 return prepare_load_table(table);
5028 }
5029
5030 /**
5031 * Loads a table into its defined secondary storage engine: public interface.
5032 *
5033 * @param table The table to load into the secondary engine. Its read_set tells
5034 * which columns to load.
5035 *
5036 * @sa handler::load_table()
5037 */
ha_load_table(const TABLE & table)5038 int handler::ha_load_table(const TABLE &table) { return load_table(table); }
5039
5040 /**
5041 * Unloads a table from its defined secondary storage engine: public interface.
5042 *
5043 * @sa handler::unload_table()
5044 */
ha_unload_table(const char * db_name,const char * table_name,bool error_if_not_loaded)5045 int handler::ha_unload_table(const char *db_name, const char *table_name,
5046 bool error_if_not_loaded) {
5047 return unload_table(db_name, table_name, error_if_not_loaded);
5048 }
5049
5050 /**
5051 Get the hard coded SE private data from the handler for a DD table.
5052
5053 @sa handler::get_se_private_data()
5054 */
ha_get_se_private_data(dd::Table * dd_table,bool reset)5055 bool handler::ha_get_se_private_data(dd::Table *dd_table, bool reset) {
5056 return get_se_private_data(dd_table, reset);
5057 }
5058
5059 /**
5060 Tell the storage engine that it is allowed to "disable transaction" in the
5061 handler. It is a hint that ACID is not required - it is used in NDB for
5062 ALTER TABLE, for example, when data are copied to temporary table.
5063 A storage engine may treat this hint any way it likes. NDB for example
5064 starts to commit every now and then automatically.
5065 This hint can be safely ignored.
5066 */
ha_enable_transaction(THD * thd,bool on)5067 int ha_enable_transaction(THD *thd, bool on) {
5068 int error = 0;
5069 DBUG_TRACE;
5070 DBUG_PRINT("enter", ("on: %d", (int)on));
5071
5072 if ((thd->get_transaction()->m_flags.enabled = on)) {
5073 /*
5074 Now all storage engines should have transaction handling enabled.
5075 But some may have it enabled all the time - "disabling" transactions
5076 is an optimization hint that storage engine is free to ignore.
5077 So, let's commit an open transaction (if any) now.
5078 */
5079 if (!(error = ha_commit_trans(thd, false)))
5080 error = trans_commit_implicit(thd);
5081 }
5082 return error;
5083 }
5084
index_next_same(uchar * buf,const uchar * key,uint keylen)5085 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen) {
5086 int error;
5087 DBUG_TRACE;
5088 if (!(error = index_next(buf))) {
5089 ptrdiff_t ptrdiff = buf - table->record[0];
5090 uchar *save_record_0 = nullptr;
5091 KEY *key_info = nullptr;
5092 KEY_PART_INFO *key_part = nullptr;
5093 KEY_PART_INFO *key_part_end = nullptr;
5094
5095 /*
5096 key_cmp_if_same() compares table->record[0] against 'key'.
5097 In parts it uses table->record[0] directly, in parts it uses
5098 field objects with their local pointers into table->record[0].
5099 If 'buf' is distinct from table->record[0], we need to move
5100 all record references. This is table->record[0] itself and
5101 the field pointers of the fields used in this key.
5102 */
5103 if (ptrdiff) {
5104 save_record_0 = table->record[0];
5105 table->record[0] = buf;
5106 key_info = table->key_info + active_index;
5107 key_part = key_info->key_part;
5108 key_part_end = key_part + key_info->user_defined_key_parts;
5109 for (; key_part < key_part_end; key_part++) {
5110 DBUG_ASSERT(key_part->field);
5111 key_part->field->move_field_offset(ptrdiff);
5112 }
5113 }
5114
5115 if (key_cmp_if_same(table, key, active_index, keylen))
5116 error = HA_ERR_END_OF_FILE;
5117
5118 /* Move back if necessary. */
5119 if (ptrdiff) {
5120 table->record[0] = save_record_0;
5121 for (key_part = key_info->key_part; key_part < key_part_end; key_part++)
5122 key_part->field->move_field_offset(-ptrdiff);
5123 }
5124 }
5125 return error;
5126 }
5127
5128 /****************************************************************************
5129 ** Some general functions that isn't in the handler class
5130 ****************************************************************************/
5131
5132 /**
5133 Initiates table-file and calls appropriate database-creator.
5134
5135 @param thd Thread context.
5136 @param path Path to table file (without extension).
5137 @param db Database name.
5138 @param table_name Table name.
5139 @param create_info HA_CREATE_INFO describing table.
5140 @param update_create_info Indicates that create_info needs to be
5141 updated from table share.
5142 @param is_temp_table Indicates that this is temporary table (for
5143 cases when this info is not available from
5144 HA_CREATE_INFO).
5145 @param table_def Data-dictionary object describing table to
5146 be used for table creation. Can be adjusted
5147 by storage engine if it supports atomic DDL.
5148 For non-temporary tables these changes will
5149 be saved to the data-dictionary by this call.
5150
5151 @retval
5152 0 ok
5153 @retval
5154 1 error
5155 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table,dd::Table * table_def)5156 int ha_create_table(THD *thd, const char *path, const char *db,
5157 const char *table_name, HA_CREATE_INFO *create_info,
5158 bool update_create_info, bool is_temp_table,
5159 dd::Table *table_def) {
5160 int error = 1;
5161 TABLE table;
5162 char name_buff[FN_REFLEN];
5163 const char *name;
5164 TABLE_SHARE share;
5165 #ifdef HAVE_PSI_TABLE_INTERFACE
5166 bool temp_table = is_temp_table ||
5167 (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5168 (strstr(path, tmp_file_prefix) != nullptr);
5169 #endif
5170 DBUG_TRACE;
5171
5172 init_tmp_table_share(thd, &share, db, 0, table_name, path, nullptr);
5173
5174 if (open_table_def(thd, &share, *table_def)) goto err;
5175
5176 #ifdef HAVE_PSI_TABLE_INTERFACE
5177 share.m_psi = PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5178 #endif
5179
5180 // When db_stat is 0, we can pass nullptr as dd::Table since it won't be used.
5181 destroy(&table);
5182 if (open_table_from_share(thd, &share, "", 0, (uint)READ_ALL, 0, &table, true,
5183 nullptr)) {
5184 #ifdef HAVE_PSI_TABLE_INTERFACE
5185 PSI_TABLE_CALL(drop_table_share)
5186 (temp_table, db, strlen(db), table_name, strlen(table_name));
5187 #endif
5188 goto err;
5189 }
5190
5191 if (update_create_info) update_create_info_from_table(create_info, &table);
5192
5193 name = get_canonical_filename(table.file, share.path.str, name_buff);
5194
5195 error = table.file->ha_create(name, &table, create_info, table_def);
5196
5197 if (error) {
5198 table.file->print_error(error, MYF(0));
5199 #ifdef HAVE_PSI_TABLE_INTERFACE
5200 PSI_TABLE_CALL(drop_table_share)
5201 (temp_table, db, strlen(db), table_name, strlen(table_name));
5202 #endif
5203 } else {
5204 /*
5205 We do post-create update only for engines supporting atomic DDL
5206 as only such engines are allowed to update dd::Table objects in
5207 handler::ha_create().
5208 The dd::Table objects for temporary tables are not stored in DD
5209 so do not need DD update.
5210 The dd::Table objects representing the DD tables themselves cannot
5211 be stored until the DD tables have been created in the SE.
5212 */
5213 if (!((create_info->options & HA_LEX_CREATE_TMP_TABLE) || is_temp_table ||
5214 dd::get_dictionary()->is_dd_table_name(db, table_name)) &&
5215 (table.file->ht->flags & HTON_SUPPORTS_ATOMIC_DDL)) {
5216 if (thd->dd_client()->update<dd::Table>(table_def)) error = 1;
5217 }
5218 }
5219 (void)closefrm(&table, false);
5220 err:
5221 free_table_share(&share);
5222 return error != 0;
5223 }
5224
5225 /**
5226 Try to discover table from engine.
5227
5228 @note
5229 If found, import the serialized dictionary information.
5230
5231 @retval
5232 -1 Table did not exists
5233 @retval
5234 0 Table created ok
5235 @retval
5236 > 0 Error, table existed but could not be created
5237 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5238 int ha_create_table_from_engine(THD *thd, const char *db, const char *name) {
5239 int error;
5240 uchar *sdi_blob;
5241 size_t sdi_len;
5242 DBUG_TRACE;
5243 DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5244
5245 if ((error = ha_discover(thd, db, name, &sdi_blob, &sdi_len))) {
5246 /* Table could not be discovered and thus not created */
5247 return error;
5248 }
5249
5250 /*
5251 Table was successfully discovered from SE, check if SDI need
5252 to be installed or if that has already been done by SE.
5253 No SDI blob returned from SE indicates it has installed
5254 the table definition for this table into DD itself.
5255 Otherwise, import the SDI based on the sdi_blob and sdi_len,
5256 which are set.
5257 */
5258 if (sdi_blob) {
5259 error = import_serialized_meta_data(sdi_blob, sdi_len, true);
5260 my_free(sdi_blob);
5261 if (error) return 2;
5262 }
5263
5264 dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
5265 const dd::Table *table_def = nullptr;
5266 if (thd->dd_client()->acquire(db, name, &table_def)) return 3;
5267
5268 if (table_def == nullptr) {
5269 my_error(ER_NO_SUCH_TABLE, MYF(0), db, name);
5270 return 3;
5271 }
5272
5273 char path[FN_REFLEN + 1];
5274 build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5275
5276 TABLE_SHARE share;
5277 init_tmp_table_share(thd, &share, db, 0, name, path, nullptr);
5278
5279 if (open_table_def(thd, &share, *table_def)) return 3;
5280
5281 TABLE table;
5282 // When db_stat is 0, we can pass nullptr as dd::Table since it won't be used.
5283 if (open_table_from_share(thd, &share, "", 0, 0, 0, &table, false, nullptr)) {
5284 free_table_share(&share);
5285 return 3;
5286 }
5287
5288 HA_CREATE_INFO create_info;
5289 update_create_info_from_table(&create_info, &table);
5290 create_info.table_options |= HA_OPTION_CREATE_FROM_ENGINE;
5291
5292 get_canonical_filename(table.file, path, path);
5293 std::unique_ptr<dd::Table> table_def_clone(table_def->clone());
5294 error =
5295 table.file->ha_create(path, &table, &create_info, table_def_clone.get());
5296 /*
5297 Note that the table_def_clone is not stored into the DD,
5298 necessary changes to the table_def should already have
5299 been done in ha_discover/import_serialized_meta_data.
5300 */
5301 (void)closefrm(&table, true);
5302
5303 return error != 0;
5304 }
5305
5306 /**
5307 Try to find a table in a storage engine.
5308
5309 @param thd Thread handle
5310 @param db Normalized table schema name
5311 @param name Normalized table name.
5312 @param[out] exists Only valid if the function succeeded.
5313
5314 @retval true An error is found
5315 @retval false Success, check *exists
5316 */
5317
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5318 bool ha_check_if_table_exists(THD *thd, const char *db, const char *name,
5319 bool *exists) {
5320 uchar *frmblob = nullptr;
5321 size_t frmlen;
5322 DBUG_TRACE;
5323
5324 *exists = !ha_discover(thd, db, name, &frmblob, &frmlen);
5325 if (*exists) my_free(frmblob);
5326
5327 return false;
5328 }
5329
5330 /**
5331 Check if a table specified by name is a system table.
5332
5333 @param db Database name for the table.
5334 @param table_name Table name to be checked.
5335 @param[out] is_sql_layer_system_table True if a system table belongs to
5336 sql_layer.
5337
5338 @return Operation status
5339 @retval true If the table name is a system table.
5340 @retval false If the table name is a user-level table.
5341 */
5342
check_if_system_table(const char * db,const char * table_name,bool * is_sql_layer_system_table)5343 static bool check_if_system_table(const char *db, const char *table_name,
5344 bool *is_sql_layer_system_table) {
5345 // Check if we have the system database name in the command.
5346 if (!dd::get_dictionary()->is_dd_schema_name(db)) return false;
5347
5348 // Check if this is SQL layer system tables.
5349 if (dd::get_dictionary()->is_system_table_name(db, table_name))
5350 *is_sql_layer_system_table = true;
5351
5352 return true;
5353 }
5354
5355 /**
5356 @brief Check if a given table is a system table.
5357
5358 @details The primary purpose of introducing this function is to stop system
5359 tables to be created or being moved to undesired storage engines.
5360
5361 @todo There is another function called is_system_table_name() used by
5362 get_table_category(), which is used to set TABLE_SHARE table_category.
5363 It checks only a subset of table name like proc, event and time*.
5364 We cannot use below function in get_table_category(),
5365 as that affects locking mechanism. If we need to
5366 unify these functions, we need to fix locking issues generated.
5367
5368 @param hton Handlerton of new engine.
5369 @param db Database name.
5370 @param table_name Table name to be checked.
5371
5372 @return Operation status
5373 @retval true If the table name is a valid system table
5374 or if its a valid user table.
5375
5376 @retval false If the table name is a system table name
5377 and does not belong to engine specified
5378 in the command.
5379 */
5380
ha_check_if_supported_system_table(handlerton * hton,const char * db,const char * table_name)5381 bool ha_check_if_supported_system_table(handlerton *hton, const char *db,
5382 const char *table_name) {
5383 DBUG_TRACE;
5384 st_sys_tbl_chk_params check_params;
5385
5386 check_params.is_sql_layer_system_table = false;
5387 if (!check_if_system_table(db, table_name,
5388 &check_params.is_sql_layer_system_table))
5389 return true; // It's a user table name
5390
5391 // Check if this is a system table and if some engine supports it.
5392 check_params.status = check_params.is_sql_layer_system_table
5393 ? st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE
5394 : st_sys_tbl_chk_params::NOT_KNOWN_SYSTEM_TABLE;
5395 check_params.db_type = hton->db_type;
5396 check_params.table_name = table_name;
5397 check_params.db = db;
5398 plugin_foreach(nullptr, check_engine_system_table_handlerton,
5399 MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5400
5401 // SE does not support this system table.
5402 if (check_params.status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE)
5403 return false;
5404
5405 // It's a system table or a valid user table.
5406 return true;
5407 }
5408
5409 /**
5410 @brief Called for each SE to check if given db, tablename is a system table.
5411
5412 @details The primary purpose of introducing this function is to stop system
5413 tables to be created or being moved to undesired storage engines.
5414
5415 @param plugin Points to specific SE.
5416 @param arg Is of type struct st_sys_tbl_chk_params.
5417
5418 @note
5419 args->status Indicates OUT param,
5420 see struct st_sys_tbl_chk_params definition for more info.
5421
5422 @return Operation status
5423 @retval true There was a match found.
5424 This will stop doing checks with other SE's.
5425
5426 @retval false There was no match found.
5427 Other SE's will be checked to find a match.
5428 */
check_engine_system_table_handlerton(THD *,plugin_ref plugin,void * arg)5429 static bool check_engine_system_table_handlerton(THD *, plugin_ref plugin,
5430 void *arg) {
5431 st_sys_tbl_chk_params *check_params = (st_sys_tbl_chk_params *)arg;
5432 handlerton *hton = plugin_data<handlerton *>(plugin);
5433
5434 // Do we already know that the table is a system table?
5435 if (check_params->status == st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE) {
5436 /*
5437 If this is the same SE specified in the command, we can
5438 simply ask the SE if it supports it stop the search regardless.
5439 */
5440 if (hton->db_type == check_params->db_type) {
5441 if (hton->is_supported_system_table &&
5442 hton->is_supported_system_table(
5443 check_params->db, check_params->table_name,
5444 check_params->is_sql_layer_system_table))
5445 check_params->status = st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5446 return true;
5447 }
5448 /*
5449 If this is a different SE, there is no point in asking the SE
5450 since we already know it's a system table and we don't care
5451 if it is supported or not.
5452 */
5453 return false;
5454 }
5455
5456 /*
5457 We don't yet know if the table is a system table or not.
5458 We therefore must always ask the SE.
5459 */
5460 if (hton->is_supported_system_table &&
5461 hton->is_supported_system_table(
5462 check_params->db, check_params->table_name,
5463 check_params->is_sql_layer_system_table)) {
5464 /*
5465 If this is the same SE specified in the command, we know it's a
5466 supported system table and can stop the search.
5467 */
5468 if (hton->db_type == check_params->db_type) {
5469 check_params->status = st_sys_tbl_chk_params::SUPPORTED_SYSTEM_TABLE;
5470 return true;
5471 } else
5472 check_params->status = st_sys_tbl_chk_params::KNOWN_SYSTEM_TABLE;
5473 }
5474
5475 return false;
5476 }
5477
rm_tmp_tables_handlerton(THD * thd,plugin_ref plugin,void * files)5478 static bool rm_tmp_tables_handlerton(THD *thd, plugin_ref plugin, void *files) {
5479 handlerton *hton = plugin_data<handlerton *>(plugin);
5480
5481 if (hton->state == SHOW_OPTION_YES && hton->rm_tmp_tables &&
5482 hton->rm_tmp_tables(hton, thd, (List<LEX_STRING> *)files))
5483 return true;
5484
5485 return false;
5486 }
5487
5488 /**
5489 Ask all SEs to drop all temporary tables which have been left from
5490 previous server run. Used on server start-up.
5491
5492 @param[in] thd Thread context.
5493 @param[in,out] files List of files in directories for temporary files
5494 which match tmp_file_prefix and thus can belong to
5495 temporary tables. If any SE recognizes some file as
5496 belonging to temporary table in this SE and deletes
5497 the file it is also supposed to remove file from
5498 this list.
5499 */
5500
ha_rm_tmp_tables(THD * thd,List<LEX_STRING> * files)5501 bool ha_rm_tmp_tables(THD *thd, List<LEX_STRING> *files) {
5502 return plugin_foreach(thd, rm_tmp_tables_handlerton,
5503 MYSQL_STORAGE_ENGINE_PLUGIN, files);
5504 }
5505
5506 /**
5507 Default implementation for handlerton::rm_tmp_tables() method which
5508 simply removes all files from "files" list which have one of SE's
5509 extensions. This implementation corresponds to default implementation
5510 of handler::delete_table() method.
5511 */
5512
default_rm_tmp_tables(handlerton * hton,THD *,List<LEX_STRING> * files)5513 bool default_rm_tmp_tables(handlerton *hton, THD *, List<LEX_STRING> *files) {
5514 List_iterator<LEX_STRING> files_it(*files);
5515 LEX_STRING *file_path;
5516
5517 if (!hton->file_extensions) return false;
5518
5519 while ((file_path = files_it++)) {
5520 const char *file_ext = fn_ext(file_path->str);
5521
5522 for (const char **ext = hton->file_extensions; *ext; ext++) {
5523 if (strcmp(file_ext, *ext) == 0) {
5524 if (my_is_symlink(file_path->str, nullptr) &&
5525 test_if_data_home_dir(file_path->str)) {
5526 /*
5527 For safety reasons, if temporary table file is a symlink pointing
5528 to a file in the data directory, don't delete the file, delete
5529 symlink file only. It would be nicer to not delete symlinked files
5530 at all but MyISAM supports temporary tables with DATA
5531 DIRECTORY/INDEX DIRECTORY options.
5532 */
5533 (void)mysql_file_delete(key_file_misc, file_path->str, MYF(0));
5534 } else
5535 (void)mysql_file_delete_with_symlink(key_file_misc, file_path->str,
5536 MYF(0));
5537 files_it.remove();
5538 break;
5539 }
5540 }
5541 }
5542 return false;
5543 }
5544
5545 /*****************************************************************************
5546 Key cache handling.
5547
5548 This code is only relevant for ISAM/MyISAM tables
5549
5550 key_cache->cache may be 0 only in the case where a key cache is not
5551 initialized or when we where not able to init the key cache in a previous
5552 call to ha_init_key_cache() (probably out of memory)
5553 *****************************************************************************/
5554
5555 /**
5556 Init a key cache if it has not been initied before.
5557 */
ha_init_key_cache(const char *,KEY_CACHE * key_cache)5558 int ha_init_key_cache(const char *, KEY_CACHE *key_cache) {
5559 DBUG_TRACE;
5560
5561 if (!key_cache->key_cache_inited) {
5562 mysql_mutex_lock(&LOCK_global_system_variables);
5563 size_t tmp_buff_size = (size_t)key_cache->param_buff_size;
5564 ulonglong tmp_block_size = key_cache->param_block_size;
5565 ulonglong division_limit = key_cache->param_division_limit;
5566 ulonglong age_threshold = key_cache->param_age_threshold;
5567 mysql_mutex_unlock(&LOCK_global_system_variables);
5568 return !init_key_cache(key_cache, tmp_block_size, tmp_buff_size,
5569 division_limit, age_threshold);
5570 }
5571 return 0;
5572 }
5573
5574 /**
5575 Resize key cache.
5576 */
ha_resize_key_cache(KEY_CACHE * key_cache)5577 int ha_resize_key_cache(KEY_CACHE *key_cache) {
5578 DBUG_TRACE;
5579
5580 if (key_cache->key_cache_inited) {
5581 mysql_mutex_lock(&LOCK_global_system_variables);
5582 size_t tmp_buff_size = (size_t)key_cache->param_buff_size;
5583 ulonglong tmp_block_size = key_cache->param_block_size;
5584 ulonglong division_limit = key_cache->param_division_limit;
5585 ulonglong age_threshold = key_cache->param_age_threshold;
5586 mysql_mutex_unlock(&LOCK_global_system_variables);
5587 const int retval =
5588 resize_key_cache(key_cache, keycache_thread_var(), tmp_block_size,
5589 tmp_buff_size, division_limit, age_threshold);
5590 return !retval;
5591 }
5592 return 0;
5593 }
5594
5595 /**
5596 Move all tables from one key cache to another one.
5597 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5598 int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache) {
5599 mi_change_key_cache(old_key_cache, new_key_cache);
5600 return 0;
5601 }
5602
5603 struct st_discover_args {
5604 const char *db;
5605 const char *name;
5606 uchar **frmblob;
5607 size_t *frmlen;
5608 };
5609
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5610 static bool discover_handlerton(THD *thd, plugin_ref plugin, void *arg) {
5611 st_discover_args *vargs = (st_discover_args *)arg;
5612 handlerton *hton = plugin_data<handlerton *>(plugin);
5613 if (hton->state == SHOW_OPTION_YES && hton->discover &&
5614 (!(hton->discover(hton, thd, vargs->db, vargs->name, vargs->frmblob,
5615 vargs->frmlen))))
5616 return true;
5617
5618 return false;
5619 }
5620
5621 /**
5622 Try to discover one table from handler(s).
5623
5624 @param[in] thd Thread context.
5625 @param[in] db Schema of table
5626 @param[in] name Name of table
5627 @param[out] frmblob Pointer to blob with table defintion.
5628 @param[out] frmlen Length of the returned table definition blob
5629
5630 @retval
5631 -1 Table did not exists
5632 @retval
5633 0 OK. Table could be discovered from SE.
5634 The *frmblob and *frmlen may be set if returning a blob
5635 which should be installed into data dictionary
5636 by the caller.
5637
5638 @retval
5639 >0 error. frmblob and frmlen may not be set
5640
5641 */
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5642 static int ha_discover(THD *thd, const char *db, const char *name,
5643 uchar **frmblob, size_t *frmlen) {
5644 int error = -1; // Table does not exist in any handler
5645 DBUG_TRACE;
5646 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5647 st_discover_args args = {db, name, frmblob, frmlen};
5648
5649 if (is_prefix(name, tmp_file_prefix)) /* skip temporary tables */
5650 return error;
5651
5652 if (plugin_foreach(thd, discover_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
5653 &args))
5654 error = 0;
5655
5656 if (!error) {
5657 DBUG_ASSERT(!thd->status_var_aggregated);
5658 thd->status_var.ha_discover_count++;
5659 }
5660 return error;
5661 }
5662
5663 /**
5664 Call this function in order to give the handler the possiblity
5665 to ask engine if there are any new tables that should be written to disk
5666 or any dropped tables that need to be removed from disk
5667 */
5668 struct st_find_files_args {
5669 const char *db;
5670 const char *path;
5671 const char *wild;
5672 bool dir;
5673 List<LEX_STRING> *files;
5674 };
5675
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5676 static bool find_files_handlerton(THD *thd, plugin_ref plugin, void *arg) {
5677 st_find_files_args *vargs = (st_find_files_args *)arg;
5678 handlerton *hton = plugin_data<handlerton *>(plugin);
5679
5680 if (hton->state == SHOW_OPTION_YES && hton->find_files)
5681 if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5682 vargs->dir, vargs->files))
5683 return true;
5684
5685 return false;
5686 }
5687
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5688 int ha_find_files(THD *thd, const char *db, const char *path, const char *wild,
5689 bool dir, List<LEX_STRING> *files) {
5690 int error = 0;
5691 DBUG_TRACE;
5692 DBUG_PRINT("enter", ("db: '%s' path: '%s' wild: '%s' dir: %d", db, path,
5693 wild ? wild : "NULL", dir));
5694 st_find_files_args args = {db, path, wild, dir, files};
5695
5696 plugin_foreach(thd, find_files_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
5697 &args);
5698 /* The return value is not currently used */
5699 return error;
5700 }
5701
5702 /**
5703 Ask handler if the table exists in engine.
5704 @retval
5705 HA_ERR_NO_SUCH_TABLE Table does not exist
5706 @retval
5707 HA_ERR_TABLE_EXIST Table exists
5708 */
5709 struct st_table_exists_in_engine_args {
5710 const char *db;
5711 const char *name;
5712 int err;
5713 };
5714
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5715 static bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5716 void *arg) {
5717 st_table_exists_in_engine_args *vargs = (st_table_exists_in_engine_args *)arg;
5718 handlerton *hton = plugin_data<handlerton *>(plugin);
5719
5720 int err = HA_ERR_NO_SUCH_TABLE;
5721
5722 if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5723 err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5724
5725 vargs->err = err;
5726 if (vargs->err == HA_ERR_TABLE_EXIST) return true;
5727
5728 return false;
5729 }
5730
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5731 int ha_table_exists_in_engine(THD *thd, const char *db, const char *name) {
5732 DBUG_TRACE;
5733 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5734 st_table_exists_in_engine_args args = {db, name, HA_ERR_NO_SUCH_TABLE};
5735 plugin_foreach(thd, table_exists_in_engine_handlerton,
5736 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5737 DBUG_PRINT("exit", ("error: %d", args.err));
5738 return args.err;
5739 }
5740
5741 /*
5742 TODO: change this into a dynamic struct
5743 List<handlerton> does not work as
5744 1. binlog_end is called when MEM_ROOT is gone
5745 2. cannot work with thd MEM_ROOT as memory should be freed
5746 */
5747 #define MAX_HTON_LIST_ST 63
5748 struct hton_list_st {
5749 handlerton *hton[MAX_HTON_LIST_ST];
5750 uint sz;
5751 };
5752
5753 struct binlog_func_st {
5754 enum_binlog_func fn;
5755 void *arg;
5756 };
5757
5758 /** @brief
5759 Listing handlertons first to avoid recursive calls and deadlock
5760 */
binlog_func_list(THD *,plugin_ref plugin,void * arg)5761 static bool binlog_func_list(THD *, plugin_ref plugin, void *arg) {
5762 hton_list_st *hton_list = (hton_list_st *)arg;
5763 handlerton *hton = plugin_data<handlerton *>(plugin);
5764 if (hton->state == SHOW_OPTION_YES && hton->binlog_func) {
5765 uint sz = hton_list->sz;
5766 if (sz == MAX_HTON_LIST_ST - 1) {
5767 /* list full */
5768 return false;
5769 }
5770 hton_list->hton[sz] = hton;
5771 hton_list->sz = sz + 1;
5772 }
5773 return false;
5774 }
5775
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5776 static bool binlog_func_foreach(THD *thd, binlog_func_st *bfn) {
5777 hton_list_st hton_list;
5778 uint i, sz;
5779
5780 hton_list.sz = 0;
5781 plugin_foreach(thd, binlog_func_list, MYSQL_STORAGE_ENGINE_PLUGIN,
5782 &hton_list);
5783
5784 for (i = 0, sz = hton_list.sz; i < sz; i++)
5785 hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5786 return false;
5787 }
5788
ha_reset_logs(THD * thd)5789 int ha_reset_logs(THD *thd) {
5790 binlog_func_st bfn = {BFN_RESET_LOGS, nullptr};
5791 binlog_func_foreach(thd, &bfn);
5792 return 0;
5793 }
5794
ha_reset_slave(THD * thd)5795 void ha_reset_slave(THD *thd) {
5796 binlog_func_st bfn = {BFN_RESET_SLAVE, nullptr};
5797 binlog_func_foreach(thd, &bfn);
5798 }
5799
ha_binlog_wait(THD * thd)5800 void ha_binlog_wait(THD *thd) {
5801 binlog_func_st bfn = {BFN_BINLOG_WAIT, nullptr};
5802 binlog_func_foreach(thd, &bfn);
5803 }
5804
ha_binlog_index_purge_file(THD * thd,const char * file)5805 int ha_binlog_index_purge_file(THD *thd, const char *file) {
5806 binlog_func_st bfn = {BFN_BINLOG_PURGE_FILE, const_cast<char *>(file)};
5807 binlog_func_foreach(thd, &bfn);
5808 return 0;
5809 }
5810
5811 struct binlog_log_query_st {
5812 enum_binlog_command binlog_command;
5813 const char *query;
5814 size_t query_length;
5815 const char *db;
5816 const char *table_name;
5817 };
5818
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5819 static bool binlog_log_query_handlerton2(THD *thd, handlerton *hton,
5820 void *args) {
5821 struct binlog_log_query_st *b = (struct binlog_log_query_st *)args;
5822 if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5823 hton->binlog_log_query(hton, thd, b->binlog_command, b->query,
5824 b->query_length, b->db, b->table_name);
5825 return false;
5826 }
5827
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5828 static bool binlog_log_query_handlerton(THD *thd, plugin_ref plugin,
5829 void *args) {
5830 return binlog_log_query_handlerton2(thd, plugin_data<handlerton *>(plugin),
5831 args);
5832 }
5833
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)5834 void ha_binlog_log_query(THD *thd, handlerton *hton,
5835 enum_binlog_command binlog_command, const char *query,
5836 size_t query_length, const char *db,
5837 const char *table_name) {
5838 struct binlog_log_query_st b;
5839 b.binlog_command = binlog_command;
5840 b.query = query;
5841 b.query_length = query_length;
5842 b.db = db;
5843 b.table_name = table_name;
5844 if (hton == nullptr)
5845 plugin_foreach(thd, binlog_log_query_handlerton,
5846 MYSQL_STORAGE_ENGINE_PLUGIN, &b);
5847 else
5848 binlog_log_query_handlerton2(thd, hton, &b);
5849 }
5850
ha_binlog_end(THD * thd)5851 int ha_binlog_end(THD *thd) {
5852 binlog_func_st bfn = {BFN_BINLOG_END, nullptr};
5853 binlog_func_foreach(thd, &bfn);
5854 return 0;
5855 }
5856
acl_notify_handlerton(THD * thd,plugin_ref plugin,void * data)5857 static bool acl_notify_handlerton(THD *thd, plugin_ref plugin, void *data) {
5858 handlerton *hton = plugin_data<handlerton *>(plugin);
5859 if (hton->state == SHOW_OPTION_YES && hton->acl_notify)
5860 hton->acl_notify(thd,
5861 static_cast<const class Acl_change_notification *>(data));
5862 return false;
5863 }
5864
ha_acl_notify(THD * thd,class Acl_change_notification * data)5865 void ha_acl_notify(THD *thd, class Acl_change_notification *data) {
5866 plugin_foreach(thd, acl_notify_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, data);
5867 }
5868
5869 /**
5870 Calculate cost of 'index only' scan for given index and number of records
5871
5872 @param keynr Index number
5873 @param records Estimated number of records to be retrieved
5874
5875 @note
5876 It is assumed that we will read trough the whole key range and that all
5877 key blocks are half full (normally things are much better). It is also
5878 assumed that each time we read the next key from the index, the handler
5879 performs a random seek, thus the cost is proportional to the number of
5880 blocks read.
5881
5882 @return
5883 Estimated cost of 'index only' scan
5884 */
5885
index_only_read_time(uint keynr,double records)5886 double handler::index_only_read_time(uint keynr, double records) {
5887 double read_time;
5888 uint keys_per_block =
5889 (stats.block_size / 2 /
5890 (table_share->key_info[keynr].key_length + ref_length) +
5891 1);
5892 read_time = ((double)(records + keys_per_block - 1) / (double)keys_per_block);
5893 return read_time;
5894 }
5895
table_in_memory_estimate() const5896 double handler::table_in_memory_estimate() const {
5897 DBUG_ASSERT(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
5898 (stats.table_in_mem_estimate >= 0.0 &&
5899 stats.table_in_mem_estimate <= 1.0));
5900
5901 /*
5902 If the storage engine has supplied information about how much of the
5903 table that is currently in a memory buffer, then use this estimate.
5904 */
5905 if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
5906 return stats.table_in_mem_estimate;
5907
5908 /*
5909 The storage engine has not provided any information about how much of
5910 this index is in memory, use an heuristic to produce an estimate.
5911 */
5912 return estimate_in_memory_buffer(stats.data_file_length);
5913 }
5914
index_in_memory_estimate(uint keyno) const5915 double handler::index_in_memory_estimate(uint keyno) const {
5916 const KEY *key = &table->key_info[keyno];
5917
5918 /*
5919 If the storage engine has supplied information about how much of the
5920 index that is currently in a memory buffer, then use this estimate.
5921 */
5922 const double est = key->in_memory_estimate();
5923 if (est != IN_MEMORY_ESTIMATE_UNKNOWN) return est;
5924
5925 /*
5926 The storage engine has not provided any information about how much of
5927 this index is in memory, use an heuristic to produce an estimate.
5928 */
5929 ulonglong file_length;
5930
5931 /*
5932 If the index is a clustered primary index, then use the data file
5933 size as estimate for how large the index is.
5934 */
5935 if (keyno == table->s->primary_key && primary_key_is_clustered())
5936 file_length = stats.data_file_length;
5937 else
5938 file_length = stats.index_file_length;
5939
5940 return estimate_in_memory_buffer(file_length);
5941 }
5942
estimate_in_memory_buffer(ulonglong table_index_size) const5943 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const {
5944 /*
5945 The storage engine has not provided any information about how much of
5946 the table/index is in memory. In this case we use a heuristic:
5947
5948 - if the size of the table/index is less than 20 percent (pick any
5949 number) of the memory buffer, then the entire table/index is likely in
5950 memory.
5951 - if the size of the table/index is larger than the memory buffer, then
5952 assume nothing of the table/index is in memory.
5953 - if the size of the table/index is larger than 20 percent but less than
5954 the memory buffer size, then use a linear function of the table/index
5955 size that goes from 1.0 to 0.0.
5956 */
5957
5958 /*
5959 If the storage engine has information about the size of its
5960 memory buffer, then use this. Otherwise, assume that at least 100 MB
5961 of data can be chached in memory.
5962 */
5963 longlong memory_buf_size = get_memory_buffer_size();
5964 if (memory_buf_size <= 0) memory_buf_size = 100 * 1024 * 1024; // 100 MB
5965
5966 /*
5967 Upper limit for the relative size of a table to be considered
5968 entirely available in a memory buffer. If the actual table size is
5969 less than this we assume it is complete cached in a memory buffer.
5970 */
5971 const double table_index_in_memory_limit = 0.2;
5972
5973 /*
5974 Estimate for how much of the total memory buffer this table/index
5975 can occupy.
5976 */
5977 const double percent_of_mem =
5978 static_cast<double>(table_index_size) / memory_buf_size;
5979
5980 double in_mem_est;
5981
5982 if (percent_of_mem < table_index_in_memory_limit) // Less than 20 percent
5983 in_mem_est = 1.0;
5984 else if (percent_of_mem > 1.0) // Larger than buffer
5985 in_mem_est = 0.0;
5986 else {
5987 /*
5988 The size of the table/index is larger than
5989 "table_index_in_memory_limit" * "memory_buf_size" but less than
5990 the total size of the memory buffer.
5991 */
5992 in_mem_est = 1.0 - (percent_of_mem - table_index_in_memory_limit) /
5993 (1.0 - table_index_in_memory_limit);
5994 }
5995 DBUG_ASSERT(in_mem_est >= 0.0 && in_mem_est <= 1.0);
5996
5997 return in_mem_est;
5998 }
5999
table_scan_cost()6000 Cost_estimate handler::table_scan_cost() {
6001 /*
6002 This function returns a Cost_estimate object. The function should be
6003 implemented in a way that allows the compiler to use "return value
6004 optimization" to avoid creating the temporary object for the return value
6005 and use of the copy constructor.
6006 */
6007
6008 const double io_cost = scan_time() * table->cost_model()->page_read_cost(1.0);
6009 Cost_estimate cost;
6010 cost.add_io(io_cost);
6011 return cost;
6012 }
6013
index_scan_cost(uint index,double ranges MY_ATTRIBUTE ((unused)),double rows)6014 Cost_estimate handler::index_scan_cost(uint index,
6015 double ranges MY_ATTRIBUTE((unused)),
6016 double rows) {
6017 /*
6018 This function returns a Cost_estimate object. The function should be
6019 implemented in a way that allows the compiler to use "return value
6020 optimization" to avoid creating the temporary object for the return value
6021 and use of the copy constructor.
6022 */
6023
6024 DBUG_ASSERT(ranges >= 0.0);
6025 DBUG_ASSERT(rows >= 0.0);
6026
6027 const double io_cost = index_only_read_time(index, rows) *
6028 table->cost_model()->page_read_cost_index(index, 1.0);
6029 Cost_estimate cost;
6030 cost.add_io(io_cost);
6031 return cost;
6032 }
6033
read_cost(uint index,double ranges,double rows)6034 Cost_estimate handler::read_cost(uint index, double ranges, double rows) {
6035 /*
6036 This function returns a Cost_estimate object. The function should be
6037 implemented in a way that allows the compiler to use "return value
6038 optimization" to avoid creating the temporary object for the return value
6039 and use of the copy constructor.
6040 */
6041
6042 DBUG_ASSERT(ranges >= 0.0);
6043 DBUG_ASSERT(rows >= 0.0);
6044
6045 const double io_cost =
6046 read_time(index, static_cast<uint>(ranges), static_cast<ha_rows>(rows)) *
6047 table->cost_model()->page_read_cost(1.0);
6048 Cost_estimate cost;
6049 cost.add_io(io_cost);
6050 return cost;
6051 }
6052
6053 /**
6054 Check if key has partially-covered columns
6055
6056 We can't use DS-MRR to perform range scans when the ranges are over
6057 partially-covered keys, because we'll not have full key part values
6058 (we'll have their prefixes from the index) and will not be able to check
6059 if we've reached the end the range.
6060
6061 @param table Table to check keys for
6062 @param keyno Key to check
6063
6064 @todo
6065 Allow use of DS-MRR in cases where the index has partially-covered
6066 components but they are not used for scanning.
6067
6068 @retval true Yes
6069 @retval false No
6070 */
6071
key_uses_partial_cols(TABLE * table,uint keyno)6072 static bool key_uses_partial_cols(TABLE *table, uint keyno) {
6073 KEY_PART_INFO *kp = table->key_info[keyno].key_part;
6074 KEY_PART_INFO *kp_end = kp + table->key_info[keyno].user_defined_key_parts;
6075 for (; kp != kp_end; kp++) {
6076 if (!kp->field->part_of_key.is_set(keyno)) return true;
6077 }
6078 return false;
6079 }
6080
6081 /****************************************************************************
6082 * Default MRR implementation (MRR to non-MRR converter)
6083 ***************************************************************************/
6084
6085 /**
6086 Get cost and other information about MRR scan over a known list of ranges
6087
6088 Calculate estimated cost and other information about an MRR scan for given
6089 sequence of ranges.
6090
6091 @param keyno Index number
6092 @param seq Range sequence to be traversed
6093 @param seq_init_param First parameter for seq->init()
6094 @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
6095 can't efficiently determine it
6096 @param [in,out] bufsz IN: Size of the buffer available for use
6097 OUT: Size of the buffer that is expected to be actually
6098 used, or 0 if buffer is not needed.
6099 @param [in,out] flags A combination of HA_MRR_* flags
6100 @param [out] cost Estimated cost of MRR access
6101
6102 @note
6103 This method (or an overriding one in a derived class) must check for
6104 \c thd->killed and return HA_POS_ERROR if it is not zero. This is required
6105 for a user to be able to interrupt the calculation by killing the
6106 connection/query.
6107
6108 @retval
6109 HA_POS_ERROR Error or the engine is unable to perform the requested
6110 scan. Values of OUT parameters are undefined.
6111 @retval
6112 other OK, *cost contains cost of the scan, *bufsz and *flags
6113 contain scan parameters.
6114 */
6115
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg MY_ATTRIBUTE ((unused)),uint * bufsz,uint * flags,Cost_estimate * cost)6116 ha_rows handler::multi_range_read_info_const(
6117 uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param,
6118 uint n_ranges_arg MY_ATTRIBUTE((unused)), uint *bufsz, uint *flags,
6119 Cost_estimate *cost) {
6120 KEY_MULTI_RANGE range;
6121 range_seq_t seq_it;
6122 ha_rows rows, total_rows = 0;
6123 uint n_ranges = 0;
6124 THD *thd = current_thd;
6125
6126 /* Default MRR implementation doesn't need buffer */
6127 *bufsz = 0;
6128
6129 DBUG_EXECUTE_IF("bug13822652_2", thd->killed = THD::KILL_QUERY;);
6130
6131 seq_it = seq->init(seq_init_param, n_ranges, *flags);
6132 while (!seq->next(seq_it, &range)) {
6133 if (unlikely(thd->killed != 0)) return HA_POS_ERROR;
6134
6135 n_ranges++;
6136 key_range *min_endp, *max_endp;
6137 if (range.range_flag & GEOM_FLAG) {
6138 min_endp = &range.start_key;
6139 max_endp = nullptr;
6140 } else {
6141 min_endp = range.start_key.length ? &range.start_key : nullptr;
6142 max_endp = range.end_key.length ? &range.end_key : nullptr;
6143 }
6144 /*
6145 Get the number of rows in the range. This is done by calling
6146 records_in_range() unless:
6147
6148 1) The index is unique.
6149 There cannot be more than one matching row, so 1 is
6150 assumed. Note that it is possible that the correct number
6151 is actually 0, so the row estimate may be too high in this
6152 case. Also note: ranges of the form "x IS NULL" may have more
6153 than 1 mathing row so records_in_range() is called for these.
6154 2) SKIP_RECORDS_IN_RANGE will be set when skip_records_in_range or
6155 use_index_statistics are true.
6156 Ranges of the form "x IS NULL" will not use index statistics
6157 because the number of rows with this value are likely to be
6158 very different than the values in the index statistics.
6159
6160 Note: With SKIP_RECORDS_IN_RANGE, use Index statistics if:
6161 a) Index statistics is available.
6162 b) The range is an equality range but the index is either not
6163 unique or all of the keyparts are not used.
6164 */
6165 int keyparts_used = 0;
6166 if ((range.range_flag & UNIQUE_RANGE) && // 1)
6167 !(range.range_flag & NULL_RANGE))
6168 rows = 1; /* there can be at most one row */
6169 else if (range.range_flag & SKIP_RECORDS_IN_RANGE && // 2)
6170 !(range.range_flag & NULL_RANGE)) {
6171 if ((range.range_flag & EQ_RANGE) &&
6172 (keyparts_used = my_count_bits(range.start_key.keypart_map)) &&
6173 table->key_info[keyno].has_records_per_key(keyparts_used - 1)) {
6174 rows = static_cast<ha_rows>(
6175 table->key_info[keyno].records_per_key(keyparts_used - 1));
6176 } else {
6177 /*
6178 Return HA_POS_ERROR if the range does not use all key parts and
6179 the key cannot use partial key searches.
6180 */
6181 if ((index_flags(keyno, 0, false) & HA_ONLY_WHOLE_INDEX)) {
6182 DBUG_ASSERT(
6183 (range.range_flag & EQ_RANGE) &&
6184 !table->key_info[keyno].has_records_per_key(keyparts_used - 1));
6185 total_rows = HA_POS_ERROR;
6186 break;
6187 }
6188 /*
6189 Since records_in_range has not been called, set the rows to 1.
6190 FORCE INDEX has been used, cost model values will be ignored anyway.
6191 */
6192 rows = 1;
6193 }
6194 } else {
6195 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6196 DBUG_ASSERT(min_endp || max_endp);
6197 if (HA_POS_ERROR ==
6198 (rows = this->records_in_range(keyno, min_endp, max_endp))) {
6199 /* Can't scan one range => can't do MRR scan at all */
6200 total_rows = HA_POS_ERROR;
6201 break;
6202 }
6203 }
6204 total_rows += rows;
6205 }
6206
6207 if (total_rows != HA_POS_ERROR) {
6208 const Cost_model_table *const cost_model = table->cost_model();
6209
6210 /* The following calculation is the same as in multi_range_read_info(): */
6211 *flags |= (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SUPPORT_SORTED);
6212
6213 DBUG_ASSERT(cost->is_zero());
6214 if (*flags & HA_MRR_INDEX_ONLY)
6215 *cost = index_scan_cost(keyno, static_cast<double>(n_ranges),
6216 static_cast<double>(total_rows));
6217 else
6218 *cost = read_cost(keyno, static_cast<double>(n_ranges),
6219 static_cast<double>(total_rows));
6220 cost->add_cpu(
6221 cost_model->row_evaluate_cost(static_cast<double>(total_rows)) + 0.01);
6222 }
6223 return total_rows;
6224 }
6225
6226 /**
6227 Get cost and other information about MRR scan over some sequence of ranges
6228
6229 Calculate estimated cost and other information about an MRR scan for some
6230 sequence of ranges.
6231
6232 The ranges themselves will be known only at execution phase. When this
6233 function is called we only know number of ranges and a (rough) E(#records)
6234 within those ranges.
6235
6236 Currently this function is only called for "n-keypart singlepoint" ranges,
6237 i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6238
6239 The flags parameter is a combination of those flags: HA_MRR_SORTED,
6240 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6241
6242 @param keyno Index number
6243 @param n_ranges Estimated number of ranges (i.e. intervals) in the
6244 range sequence.
6245 @param n_rows Estimated total number of records contained within all
6246 of the ranges
6247 @param [in,out] bufsz IN: Size of the buffer available for use
6248 OUT: Size of the buffer that will be actually used, or
6249 0 if buffer is not needed.
6250 @param [in,out] flags A combination of HA_MRR_* flags
6251 @param [out] cost Estimated cost of MRR access
6252
6253 @retval
6254 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6255 parameters.
6256 @retval
6257 other Error or can't perform the requested scan
6258 */
6259
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6260 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6261 uint *bufsz, uint *flags,
6262 Cost_estimate *cost) {
6263 *bufsz = 0; /* Default implementation doesn't need a buffer */
6264
6265 *flags |= HA_MRR_USE_DEFAULT_IMPL;
6266 *flags |= HA_MRR_SUPPORT_SORTED;
6267
6268 DBUG_ASSERT(cost->is_zero());
6269
6270 /* Produce the same cost as non-MRR code does */
6271 if (*flags & HA_MRR_INDEX_ONLY)
6272 *cost = index_scan_cost(keyno, n_ranges, n_rows);
6273 else
6274 *cost = read_cost(keyno, n_ranges, n_rows);
6275 return 0;
6276 }
6277
6278 /**
6279 Initialize the MRR scan.
6280
6281 This function may do heavyweight scan
6282 initialization like row prefetching/sorting/etc (NOTE: but better not do
6283 it here as we may not need it, e.g. if we never satisfy WHERE clause on
6284 previous tables. For many implementations it would be natural to do such
6285 initializations in the first multi_read_range_next() call)
6286
6287 mode is a combination of the following flags: HA_MRR_SORTED,
6288 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6289
6290 @param seq_funcs Range sequence to be traversed
6291 @param seq_init_param First parameter for seq->init()
6292 @param n_ranges Number of ranges in the sequence
6293 @param mode Flags, see the description section for the details
6294 @param buf INOUT: memory buffer to be used
6295
6296 @note
6297 One must have called index_init() before calling this function. Several
6298 multi_range_read_init() calls may be made in course of one query.
6299
6300 Until WL#2623 is done (see its text, section 3.2), the following will
6301 also hold:
6302 The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6303 then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6304 This property will only be used by NDB handler until WL#2623 is done.
6305
6306 Buffer memory management is done according to the following scenario:
6307 The caller allocates the buffer and provides it to the callee by filling
6308 the members of HANDLER_BUFFER structure.
6309 The callee consumes all or some fraction of the provided buffer space, and
6310 sets the HANDLER_BUFFER members accordingly.
6311 The callee may use the buffer memory until the next multi_range_read_init()
6312 call is made, all records have been read, or until index_end() call is
6313 made, whichever comes first.
6314
6315 @retval 0 OK
6316 @retval 1 Error
6317 */
6318
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf MY_ATTRIBUTE ((unused)))6319 int handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs,
6320 void *seq_init_param, uint n_ranges,
6321 uint mode,
6322 HANDLER_BUFFER *buf MY_ATTRIBUTE((unused))) {
6323 DBUG_TRACE;
6324 mrr_iter = seq_funcs->init(seq_init_param, n_ranges, mode);
6325 mrr_funcs = *seq_funcs;
6326 mrr_is_output_sorted = mode & HA_MRR_SORTED;
6327 mrr_have_range = false;
6328 return 0;
6329 }
6330
ha_multi_range_read_next(char ** range_info)6331 int handler::ha_multi_range_read_next(char **range_info) {
6332 int result;
6333 DBUG_TRACE;
6334
6335 // Set status for the need to update generated fields
6336 m_update_generated_read_fields = table->has_gcol();
6337
6338 result = multi_range_read_next(range_info);
6339 if (!result && m_update_generated_read_fields) {
6340 result =
6341 update_generated_read_fields(table->record[0], table, active_index);
6342 m_update_generated_read_fields = false;
6343 }
6344 table->set_row_status_from_handler(result);
6345 return result;
6346 }
6347
6348 /**
6349 Get next record in MRR scan
6350
6351 Default MRR implementation: read the next record
6352
6353 @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6354 Otherwise, the opaque value associated with the range
6355 that contains the returned record.
6356
6357 @retval 0 OK
6358 @retval other Error code
6359 */
6360
multi_range_read_next(char ** range_info)6361 int handler::multi_range_read_next(char **range_info) {
6362 int result = HA_ERR_END_OF_FILE;
6363 int range_res = 0;
6364 bool dup_found = false;
6365 DBUG_TRACE;
6366 // For a multi-valued index the unique filter have to be used for correct
6367 // result
6368 DBUG_ASSERT(!(table->key_info[active_index].flags & HA_MULTI_VALUED_KEY) ||
6369 m_unique);
6370
6371 if (!mrr_have_range) {
6372 mrr_have_range = true;
6373 goto start;
6374 }
6375
6376 do {
6377 /*
6378 Do not call read_range_next() if its equality on a unique
6379 index.
6380 */
6381 if (!((mrr_cur_range.range_flag & UNIQUE_RANGE) &&
6382 (mrr_cur_range.range_flag & EQ_RANGE))) {
6383 DBUG_ASSERT(!result || result == HA_ERR_END_OF_FILE);
6384 result = read_range_next();
6385 DBUG_EXECUTE_IF("bug20162055_DEADLOCK", result = HA_ERR_LOCK_DEADLOCK;);
6386 /*
6387 On success check loop condition to filter duplicates, if needed.
6388 Exit on non-EOF error. Use next range on EOF error.
6389 */
6390 if (!result) continue;
6391 if (result != HA_ERR_END_OF_FILE) break;
6392 } else {
6393 if (was_semi_consistent_read()) goto scan_it_again;
6394 }
6395
6396 start:
6397 /* Try the next range(s) until one matches a record. */
6398 while (!(range_res = mrr_funcs.next(mrr_iter, &mrr_cur_range))) {
6399 scan_it_again:
6400 result = read_range_first(
6401 mrr_cur_range.start_key.keypart_map ? &mrr_cur_range.start_key
6402 : nullptr,
6403 mrr_cur_range.end_key.keypart_map ? &mrr_cur_range.end_key : nullptr,
6404 mrr_cur_range.range_flag & EQ_RANGE, mrr_is_output_sorted);
6405 if (result != HA_ERR_END_OF_FILE) break;
6406 }
6407 } while (((result == HA_ERR_END_OF_FILE) ||
6408 (m_unique && (dup_found = filter_dup_records()))) &&
6409 !range_res);
6410
6411 *range_info = mrr_cur_range.ptr;
6412 /*
6413 Last found record was a duplicate and we retrieved records from all
6414 ranges, so no more records can be returned.
6415 */
6416 if (dup_found && range_res) result = HA_ERR_END_OF_FILE;
6417
6418 DBUG_PRINT("exit", ("handler::multi_range_read_next result %d", result));
6419 return result;
6420 }
6421
6422 /****************************************************************************
6423 * DS-MRR implementation
6424 ***************************************************************************/
6425
6426 /**
6427 DS-MRR: Initialize and start MRR scan
6428
6429 Initialize and start the MRR scan. Depending on the mode parameter, this
6430 may use default or DS-MRR implementation.
6431
6432 The DS-MRR implementation will use a second handler object (h2) for
6433 doing scan on the index:
6434 - on the first call to this function the h2 handler will be created
6435 and h2 will be opened using the same index as the main handler
6436 is set to use. The index scan on the main index will be closed
6437 and it will be re-opened to read records from the table using either
6438 no key or the primary key. The h2 handler will be deleted when
6439 reset() is called (which should happen on the end of the statement).
6440 - when dsmrr_close() is called the index scan on h2 is closed.
6441 - on following calls to this function one of the following must be valid:
6442 a. if dsmrr_close has been called:
6443 the main handler (h) must be open on an index, h2 will be opened
6444 using this index, and the index on h will be closed and
6445 h will be re-opened to read reads from the table using either
6446 no key or the primary key.
6447 b. dsmrr_close has not been called:
6448 h2 will already be open, the main handler h must be set up
6449 to read records from the table (handler->inited is RND) either
6450 using the primary index or using no index at all.
6451
6452 @param seq_funcs Interval sequence enumeration functions
6453 @param seq_init_param Interval sequence enumeration parameter
6454 @param n_ranges Number of ranges in the sequence.
6455 @param mode HA_MRR_* modes to use
6456 @param[in,out] buf Buffer to use
6457
6458 @retval 0 Ok, Scan started.
6459 @retval other Error
6460 */
6461
dsmrr_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6462 int DsMrr_impl::dsmrr_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6463 uint n_ranges, uint mode, HANDLER_BUFFER *buf) {
6464 DBUG_ASSERT(table != nullptr); // Verify init() called
6465
6466 uint elem_size;
6467 int retval = 0;
6468 DBUG_TRACE;
6469 THD *const thd = table->in_use; // current THD
6470
6471 if (!hint_key_state(thd, table->pos_in_table_list, h->active_index,
6472 MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6473 mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6474 {
6475 use_default_impl = true;
6476 retval = h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6477 n_ranges, mode, buf);
6478 return retval;
6479 }
6480
6481 /*
6482 This assert will hit if we have pushed an index condition to the
6483 primary key index and then "change our mind" and use a different
6484 index for retrieving data with MRR. One of the following criteria
6485 must be true:
6486 1. We have not pushed an index conditon on this handler.
6487 2. We have pushed an index condition and this is on the currently used
6488 index.
6489 3. We have pushed an index condition but this is not for the primary key.
6490 4. We have pushed an index condition and this has been transferred to
6491 the clone (h2) of the handler object.
6492 */
6493 DBUG_ASSERT(!h->pushed_idx_cond ||
6494 h->pushed_idx_cond_keyno == h->active_index ||
6495 h->pushed_idx_cond_keyno != table->s->primary_key ||
6496 (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6497
6498 rowids_buf = buf->buffer;
6499
6500 is_mrr_assoc = !(mode & HA_MRR_NO_ASSOCIATION);
6501
6502 if (is_mrr_assoc) {
6503 DBUG_ASSERT(!thd->status_var_aggregated);
6504 table->in_use->status_var.ha_multi_range_read_init_count++;
6505 }
6506
6507 rowids_buf_end = buf->buffer_end;
6508 elem_size = h->ref_length + (int)is_mrr_assoc * sizeof(void *);
6509 rowids_buf_last =
6510 rowids_buf + ((rowids_buf_end - rowids_buf) / elem_size) * elem_size;
6511 rowids_buf_end = rowids_buf_last;
6512
6513 /*
6514 The DS-MRR scan uses a second handler object (h2) for doing the
6515 index scan. Create this by cloning the primary handler
6516 object. The h2 handler object is deleted when DsMrr_impl::reset()
6517 is called.
6518 */
6519 if (!h2) {
6520 handler *new_h2;
6521 /*
6522 ::clone() takes up a lot of stack, especially on 64 bit platforms.
6523 The constant 5 is an empiric result.
6524 @todo Is this still the case? Leave it as it is for now but could
6525 likely be removed?
6526 */
6527 if (check_stack_overrun(thd, 5 * STACK_MIN_SIZE, (uchar *)&new_h2))
6528 return 1;
6529
6530 if (!(new_h2 = h->clone(table->s->normalized_path.str, thd->mem_root)))
6531 return 1;
6532 h2 = new_h2; /* Ok, now can put it into h2 */
6533 table->prepare_for_position();
6534 }
6535
6536 /*
6537 Open the index scan on h2 using the key from the primary handler.
6538 */
6539 if (h2->active_index == MAX_KEY) {
6540 DBUG_ASSERT(h->active_index != MAX_KEY);
6541 const uint mrr_keyno = h->active_index;
6542
6543 if ((retval = h2->ha_external_lock(thd, h->get_lock_type()))) goto error;
6544
6545 if ((retval = h2->extra(HA_EXTRA_KEYREAD))) goto error;
6546
6547 if ((retval = h2->ha_index_init(mrr_keyno, false))) goto error;
6548
6549 if ((table->key_info[mrr_keyno].flags & HA_MULTI_VALUED_KEY) &&
6550 (retval = h2->ha_extra(HA_EXTRA_ENABLE_UNIQUE_RECORD_FILTER)))
6551 goto error; /* purecov: inspected */
6552
6553 // Transfer ICP from h to h2
6554 if (mrr_keyno == h->pushed_idx_cond_keyno) {
6555 if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond)) {
6556 retval = 1;
6557 goto error;
6558 }
6559 } else {
6560 // Cancel any potentially previously pushed index conditions
6561 h2->cancel_pushed_idx_cond();
6562 }
6563 } else {
6564 /*
6565 h2 has already an open index. This happens when the DS-MRR scan
6566 is re-started without closing it first. In this case the primary
6567 handler must be used for reading records from the table, ie. it
6568 must not be opened for doing a new range scan. In this case
6569 the active_index must either not be set or be the primary key.
6570 */
6571 DBUG_ASSERT(h->inited == handler::RND);
6572 DBUG_ASSERT(h->active_index == MAX_KEY ||
6573 h->active_index == table->s->primary_key);
6574 }
6575
6576 /*
6577 The index scan is now transferred to h2 and we can close the open
6578 index scan on the primary handler.
6579 */
6580 if (h->inited == handler::INDEX) {
6581 /*
6582 Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6583 which will close the index scan on h2. We need to keep it open, so
6584 temporarily move h2 out of the DsMrr object.
6585 */
6586 handler *save_h2 = h2;
6587 h2 = nullptr;
6588 retval = h->ha_index_end();
6589 h2 = save_h2;
6590 if (retval) goto error;
6591 }
6592
6593 /*
6594 Verify consistency between h and h2.
6595 */
6596 DBUG_ASSERT(h->inited != handler::INDEX);
6597 DBUG_ASSERT(h->active_index == MAX_KEY ||
6598 h->active_index == table->s->primary_key);
6599 DBUG_ASSERT(h2->inited == handler::INDEX);
6600 DBUG_ASSERT(h2->active_index != MAX_KEY);
6601 DBUG_ASSERT(h->get_lock_type() == h2->get_lock_type());
6602
6603 if ((retval = h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6604 n_ranges, mode, buf)))
6605 goto error;
6606
6607 if ((retval = dsmrr_fill_buffer())) goto error;
6608
6609 /*
6610 If the above call has scanned through all intervals in *seq, then
6611 adjust *buf to indicate that the remaining buffer space will not be used.
6612 */
6613 if (dsmrr_eof) buf->end_of_used_area = rowids_buf_last;
6614
6615 /*
6616 h->inited == INDEX may occur when 'range checked for each record' is
6617 used.
6618 */
6619 if ((h->inited != handler::RND) &&
6620 ((h->inited == handler::INDEX ? h->ha_index_end() : false) ||
6621 (h->ha_rnd_init(false)))) {
6622 retval = 1;
6623 goto error;
6624 }
6625
6626 use_default_impl = false;
6627 h->mrr_funcs = *seq_funcs;
6628
6629 return 0;
6630 error:
6631 h2->ha_index_or_rnd_end();
6632 h2->ha_external_lock(thd, F_UNLCK);
6633 h2->ha_close();
6634 destroy(h2);
6635 h2 = nullptr;
6636 DBUG_ASSERT(retval != 0);
6637 return retval;
6638 }
6639
dsmrr_close()6640 void DsMrr_impl::dsmrr_close() {
6641 DBUG_TRACE;
6642
6643 // If there is an open index on h2, then close it
6644 if (h2 && h2->active_index != MAX_KEY) {
6645 h2->ha_index_or_rnd_end();
6646 h2->ha_external_lock(current_thd, F_UNLCK);
6647 }
6648 use_default_impl = true;
6649 }
6650
reset()6651 void DsMrr_impl::reset() {
6652 DBUG_TRACE;
6653
6654 if (h2) {
6655 // Close any ongoing DS-MRR scan
6656 dsmrr_close();
6657
6658 // Close and delete the h2 handler
6659 h2->ha_close();
6660 destroy(h2);
6661 h2 = nullptr;
6662 }
6663 }
6664
6665 /**
6666 DS-MRR: Fill the buffer with rowids and sort it by rowid
6667
6668 {This is an internal function of DiskSweep MRR implementation}
6669 Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6670 buffer. When the buffer is full or scan is completed, sort the buffer by
6671 rowid and return.
6672
6673 The function assumes that rowids buffer is empty when it is invoked.
6674
6675 @retval 0 OK, the next portion of rowids is in the buffer,
6676 properly ordered
6677 @retval other Error
6678 */
6679
dsmrr_fill_buffer()6680 int DsMrr_impl::dsmrr_fill_buffer() {
6681 char *range_info;
6682 int res = 0;
6683 DBUG_TRACE;
6684 DBUG_ASSERT(rowids_buf < rowids_buf_end);
6685
6686 /*
6687 Set key_read to true since we only read fields from the index.
6688 This ensures that any virtual columns are read from index and are not
6689 attempted to be evaluated from base columns.
6690 (Do not use TABLE::set_keyread() since the MRR implementation operates
6691 with two handler objects, and set_keyread() would manipulate the keyread
6692 property of the wrong handler. MRR sets the handlers' keyread properties
6693 when initializing the MRR operation, independent of this call).
6694 */
6695 DBUG_ASSERT(table->key_read == false);
6696 table->key_read = true;
6697
6698 rowids_buf_cur = rowids_buf;
6699 /*
6700 Do not use ha_multi_range_read_next() as it would call the engine's
6701 overridden multi_range_read_next() but the default implementation is wanted.
6702 */
6703 while ((rowids_buf_cur < rowids_buf_end) &&
6704 !(res = h2->handler::multi_range_read_next(&range_info))) {
6705 KEY_MULTI_RANGE *curr_range = &h2->handler::mrr_cur_range;
6706 if (h2->mrr_funcs.skip_index_tuple &&
6707 h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6708 continue;
6709
6710 /* Put rowid, or {rowid, range_id} pair into the buffer */
6711 h2->position(table->record[0]);
6712 memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6713 rowids_buf_cur += h2->ref_length;
6714
6715 if (is_mrr_assoc) {
6716 memcpy(rowids_buf_cur, &range_info, sizeof(void *));
6717 rowids_buf_cur += sizeof(void *);
6718 }
6719 }
6720
6721 // Restore key_read since the next read operation will read complete rows
6722 table->key_read = false;
6723
6724 if (res && res != HA_ERR_END_OF_FILE) return res;
6725 dsmrr_eof = (res == HA_ERR_END_OF_FILE);
6726
6727 /* Sort the buffer contents by rowid */
6728 uint elem_size = h->ref_length + (int)is_mrr_assoc * sizeof(void *);
6729 DBUG_ASSERT((rowids_buf_cur - rowids_buf) % elem_size == 0);
6730
6731 varlen_sort(
6732 rowids_buf, rowids_buf_cur, elem_size,
6733 [this](const uchar *a, const uchar *b) { return h->cmp_ref(a, b) < 0; });
6734 rowids_buf_last = rowids_buf_cur;
6735 rowids_buf_cur = rowids_buf;
6736 return 0;
6737 }
6738
6739 /*
6740 DS-MRR implementation: multi_range_read_next() function
6741 */
6742
dsmrr_next(char ** range_info)6743 int DsMrr_impl::dsmrr_next(char **range_info) {
6744 int res;
6745 uchar *cur_range_info = nullptr;
6746 uchar *rowid;
6747
6748 if (use_default_impl) return h->handler::multi_range_read_next(range_info);
6749
6750 do {
6751 if (rowids_buf_cur == rowids_buf_last) {
6752 if (dsmrr_eof) {
6753 res = HA_ERR_END_OF_FILE;
6754 goto end;
6755 }
6756
6757 res = dsmrr_fill_buffer();
6758 if (res) goto end;
6759 }
6760
6761 /* return eof if there are no rowids in the buffer after re-fill attempt */
6762 if (rowids_buf_cur == rowids_buf_last) {
6763 res = HA_ERR_END_OF_FILE;
6764 goto end;
6765 }
6766 rowid = rowids_buf_cur;
6767
6768 if (is_mrr_assoc)
6769 memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar *));
6770
6771 rowids_buf_cur += h->ref_length + sizeof(void *) * is_mrr_assoc;
6772 if (h2->mrr_funcs.skip_record &&
6773 h2->mrr_funcs.skip_record(h2->mrr_iter, (char *)cur_range_info, rowid))
6774 continue;
6775 res = h->ha_rnd_pos(table->record[0], rowid);
6776 break;
6777 } while (true);
6778
6779 if (is_mrr_assoc) {
6780 memcpy(range_info, rowid + h->ref_length, sizeof(void *));
6781 }
6782 end:
6783 return res;
6784 }
6785
6786 /*
6787 DS-MRR implementation: multi_range_read_info() function
6788 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6789 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6790 uint *bufsz, uint *flags, Cost_estimate *cost) {
6791 ha_rows res MY_ATTRIBUTE((unused));
6792 uint def_flags = *flags;
6793 uint def_bufsz = *bufsz;
6794
6795 /* Get cost/flags/mem_usage of default MRR implementation */
6796 res = h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6797 &def_flags, cost);
6798 DBUG_ASSERT(!res);
6799
6800 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6801 choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
6802 /* Default implementation is choosen */
6803 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6804 *flags = def_flags;
6805 *bufsz = def_bufsz;
6806 DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6807 } else {
6808 /* *flags and *bufsz were set by choose_mrr_impl */
6809 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6810 }
6811 return 0;
6812 }
6813
6814 /*
6815 DS-MRR Implementation: multi_range_read_info_const() function
6816 */
6817
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6818 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6819 void *seq_init_param, uint n_ranges,
6820 uint *bufsz, uint *flags,
6821 Cost_estimate *cost) {
6822 ha_rows rows;
6823 uint def_flags = *flags;
6824 uint def_bufsz = *bufsz;
6825 /* Get cost/flags/mem_usage of default MRR implementation */
6826 rows = h->handler::multi_range_read_info_const(
6827 keyno, seq, seq_init_param, n_ranges, &def_bufsz, &def_flags, cost);
6828 if (rows == HA_POS_ERROR) {
6829 /* Default implementation can't perform MRR scan => we can't either */
6830 return rows;
6831 }
6832
6833 /*
6834 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6835 use the default MRR implementation (we need it for UPDATE/DELETE).
6836 Otherwise, make a choice based on cost and mrr* flags of
6837 @@optimizer_switch.
6838 */
6839 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6840 choose_mrr_impl(keyno, rows, flags, bufsz, cost)) {
6841 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6842 *flags = def_flags;
6843 *bufsz = def_bufsz;
6844 DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL);
6845 } else {
6846 /* *flags and *bufsz were set by choose_mrr_impl */
6847 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6848 }
6849 return rows;
6850 }
6851
6852 /**
6853 DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6854
6855 Make the choice between using Default MRR implementation and DS-MRR.
6856 This function contains common functionality factored out of dsmrr_info()
6857 and dsmrr_info_const(). The function assumes that the default MRR
6858 implementation's applicability requirements are satisfied.
6859
6860 @param keyno Index number
6861 @param rows E(full rows to be retrieved)
6862 @param flags IN MRR flags provided by the MRR user
6863 OUT If DS-MRR is choosen, flags of DS-MRR implementation
6864 else the value is not modified
6865 @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
6866 else the value is not modified
6867 @param cost IN Cost of default MRR implementation
6868 OUT If DS-MRR is choosen, cost of DS-MRR scan
6869 else the value is not modified
6870
6871 @retval true Default MRR implementation should be used
6872 @retval false DS-MRR implementation should be used
6873 */
6874
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)6875 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
6876 uint *bufsz, Cost_estimate *cost) {
6877 bool res;
6878 THD *thd = current_thd;
6879 TABLE_LIST *tl = table->pos_in_table_list;
6880 const bool mrr_on =
6881 hint_key_state(thd, tl, keyno, MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR);
6882 const bool force_dsmrr_by_hints =
6883 hint_key_state(thd, tl, keyno, MRR_HINT_ENUM, 0) ||
6884 hint_table_state(thd, tl, BKA_HINT_ENUM, 0);
6885
6886 if (!(mrr_on || force_dsmrr_by_hints) ||
6887 *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
6888 (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
6889 key_uses_partial_cols(table, keyno) ||
6890 table->s->tmp_table != NO_TMP_TABLE) {
6891 /* Use the default implementation, don't modify args: See comments */
6892 return true;
6893 }
6894
6895 /*
6896 If @@optimizer_switch has "mrr_cost_based" on, we should avoid
6897 using DS-MRR for queries where it is likely that the records are
6898 stored in memory. Since there is currently no way to determine
6899 this, we use a heuristic:
6900 a) if the storage engine has a memory buffer, DS-MRR is only
6901 considered if the table size is bigger than the buffer.
6902 b) if the storage engine does not have a memory buffer, DS-MRR is
6903 only considered if the table size is bigger than 100MB.
6904 c) Since there is an initial setup cost of DS-MRR, so it is only
6905 considered if at least 50 records will be read.
6906 */
6907 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
6908 !force_dsmrr_by_hints) {
6909 /*
6910 If the storage engine has a database buffer we use this as the
6911 minimum size the table should have before considering DS-MRR.
6912 */
6913 longlong min_file_size = table->file->get_memory_buffer_size();
6914 if (min_file_size == -1) {
6915 // No estimate for database buffer
6916 min_file_size = 100 * 1024 * 1024; // 100 MB
6917 }
6918
6919 if (table->file->stats.data_file_length <
6920 static_cast<ulonglong>(min_file_size) ||
6921 rows <= 50)
6922 return true; // Use the default implementation
6923 }
6924
6925 Cost_estimate dsmrr_cost;
6926 if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
6927 return true;
6928
6929 /*
6930 If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
6931 of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
6932 allows one to force use of DS-MRR whenever it is applicable without
6933 affecting other cost-based choices. Note that if MRR or BKA hint is
6934 specified, DS-MRR will be used regardless of cost.
6935 */
6936 const bool force_dsmrr =
6937 (force_dsmrr_by_hints ||
6938 !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
6939
6940 if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
6941 dsmrr_cost = *cost;
6942
6943 if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost())) {
6944 *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
6945 *flags &= ~HA_MRR_SUPPORT_SORTED; /* We can't provide ordered output */
6946 *cost = dsmrr_cost;
6947 res = false;
6948 } else {
6949 /* Use the default MRR implementation */
6950 res = true;
6951 }
6952 return res;
6953 }
6954
6955 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
6956 Cost_estimate *cost);
6957
6958 /**
6959 Get cost of DS-MRR scan
6960
6961 @param keynr Index to be used
6962 @param rows E(Number of rows to be scanned)
6963 @param flags Scan parameters (HA_MRR_* flags)
6964 @param buffer_size INOUT Buffer size
6965 @param cost OUT The cost
6966
6967 @retval false OK
6968 @retval true Error, DS-MRR cannot be used (the buffer is too small
6969 for even 1 rowid)
6970 */
6971
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)6972 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
6973 uint *buffer_size,
6974 Cost_estimate *cost) {
6975 ha_rows rows_in_last_step;
6976 uint n_full_steps;
6977
6978 const uint elem_size =
6979 h->ref_length + sizeof(void *) * !(flags & HA_MRR_NO_ASSOCIATION);
6980 const ha_rows max_buff_entries = *buffer_size / elem_size;
6981
6982 if (!max_buff_entries)
6983 return true; /* Buffer has not enough space for even 1 rowid */
6984
6985 /* Number of iterations we'll make with full buffer */
6986 n_full_steps = (uint)floor(rows2double(rows) / max_buff_entries);
6987
6988 /*
6989 Get numbers of rows we'll be processing in last iteration, with
6990 non-full buffer
6991 */
6992 rows_in_last_step = rows % max_buff_entries;
6993
6994 DBUG_ASSERT(cost->is_zero());
6995
6996 if (n_full_steps) {
6997 get_sort_and_sweep_cost(table, max_buff_entries, cost);
6998 cost->multiply(n_full_steps);
6999 } else {
7000 /*
7001 Adjust buffer size since only parts of the buffer will be used:
7002 1. Adjust record estimate for the last scan to reduce likelyhood
7003 of needing more than one scan by adding 20 percent to the
7004 record estimate and by ensuring this is at least 100 records.
7005 2. If the estimated needed buffer size is lower than suggested by
7006 the caller then set it to the estimated buffer size.
7007 */
7008 const ha_rows keys_in_buffer =
7009 max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7010 *buffer_size = min<ulong>(*buffer_size,
7011 static_cast<ulong>(keys_in_buffer) * elem_size);
7012 }
7013
7014 Cost_estimate last_step_cost;
7015 get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7016 (*cost) += last_step_cost;
7017
7018 /*
7019 Cost of memory is not included in the total_cost() function and
7020 thus will not be considered when comparing costs. Still, we
7021 record it in the cost estimate object for future use.
7022 */
7023 cost->add_mem(*buffer_size);
7024
7025 /* Total cost of all index accesses */
7026 (*cost) += h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7027
7028 /*
7029 Add CPU cost for processing records (see
7030 @handler::multi_range_read_info_const()).
7031 */
7032 cost->add_cpu(
7033 table->cost_model()->row_evaluate_cost(static_cast<double>(rows)));
7034 return false;
7035 }
7036
7037 /*
7038 Get cost of one sort-and-sweep step
7039
7040 SYNOPSIS
7041 get_sort_and_sweep_cost()
7042 table Table being accessed
7043 nrows Number of rows to be sorted and retrieved
7044 cost OUT The cost
7045
7046 DESCRIPTION
7047 Get cost of these operations:
7048 - sort an array of #nrows ROWIDs using qsort
7049 - read #nrows records from table in a sweep.
7050 */
7051
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7052 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7053 Cost_estimate *cost) {
7054 DBUG_ASSERT(cost->is_zero());
7055 if (nrows) {
7056 get_sweep_read_cost(table, nrows, false, cost);
7057
7058 /*
7059 @todo CostModel: For the old version of the cost model the
7060 following code should be used. For the new version of the cost
7061 model Cost_model::key_compare_cost() should be used. When
7062 removing support for the old cost model this code should be
7063 removed. The reason for this is that we should get rid of the
7064 ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7065 the current value returned by key_compare_cost() this would
7066 overestimate the cost for sorting.
7067 */
7068
7069 /*
7070 Constant for the cost of doing one key compare operation in the
7071 sort operation. We should have used the value returned by
7072 key_compare_cost() here but this would make the cost
7073 estimate of sorting very high for queries accessing many
7074 records. Until this constant is adjusted we introduce a constant
7075 that is more realistic. @todo: Replace this with
7076 key_compare_cost() when this has been given a realistic value.
7077 */
7078 const double ROWID_COMPARE_SORT_COST =
7079 table->cost_model()->key_compare_cost(1.0) / 10;
7080
7081 /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7082
7083 // For the old version of the cost model this cost calculations should
7084 // be used....
7085 const double cpu_sort = nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7086 // .... For the new cost model something like this should be used...
7087 // cpu_sort= nrows * log2(nrows) *
7088 // table->cost_model()->rowid_compare_cost();
7089 cost->add_cpu(cpu_sort);
7090 }
7091 }
7092
7093 /**
7094 Get cost of reading nrows table records in a "disk sweep"
7095
7096 A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7097 for an ordered sequence of rowids.
7098
7099 We take into account that some of the records might be in a memory
7100 buffer while others need to be read from a secondary storage
7101 device. The model for this assumes hard disk IO. A disk read is
7102 performed as follows:
7103
7104 1. The disk head is moved to the needed cylinder
7105 2. The controller waits for the plate to rotate
7106 3. The data is transferred
7107
7108 Time to do #3 is insignificant compared to #2+#1.
7109
7110 Time to move the disk head is proportional to head travel distance.
7111
7112 Time to wait for the plate to rotate depends on whether the disk head
7113 was moved or not.
7114
7115 If disk head wasn't moved, the wait time is proportional to distance
7116 between the previous block and the block we're reading.
7117
7118 If the head was moved, we don't know how much we'll need to wait for the
7119 plate to rotate. We assume the wait time to be a variate with a mean of
7120 0.5 of full rotation time.
7121
7122 Our cost units are "random disk seeks". The cost of random disk seek is
7123 actually not a constant, it depends one range of cylinders we're going
7124 to access. We make it constant by introducing a fuzzy concept of "typical
7125 datafile length" (it's fuzzy as it's hard to tell whether it should
7126 include index file, temp.tables etc). Then random seek cost is:
7127
7128 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7129
7130 We define half_rotation_cost as disk_seek_base_cost() (see
7131 Cost_model_server::disk_seek_base_cost()).
7132
7133 @param table Table to be accessed
7134 @param nrows Number of rows to retrieve
7135 @param interrupted true <=> Assume that the disk sweep will be
7136 interrupted by other disk IO. false - otherwise.
7137 @param[out] cost the cost
7138 */
7139
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7140 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7141 Cost_estimate *cost) {
7142 DBUG_TRACE;
7143
7144 DBUG_ASSERT(cost->is_zero());
7145 if (nrows > 0) {
7146 const Cost_model_table *const cost_model = table->cost_model();
7147
7148 // The total number of blocks used by this table
7149 double n_blocks =
7150 ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7151 if (n_blocks < 1.0) // When data_file_length is 0
7152 n_blocks = 1.0;
7153
7154 /*
7155 The number of blocks that in average need to be read given that
7156 the records are uniformly distribution over the table.
7157 */
7158 double busy_blocks =
7159 n_blocks * (1.0 - pow(1.0 - 1.0 / n_blocks, rows2double(nrows)));
7160 if (busy_blocks < 1.0) busy_blocks = 1.0;
7161
7162 DBUG_PRINT("info",
7163 ("sweep: nblocks=%g, busy_blocks=%g", n_blocks, busy_blocks));
7164 /*
7165 The random access cost for reading the data pages will be the upper
7166 limit for the sweep_cost.
7167 */
7168 cost->add_io(cost_model->page_read_cost(busy_blocks));
7169 if (!interrupted) {
7170 Cost_estimate sweep_cost;
7171 /*
7172 Assume reading pages from disk is done in one 'sweep'.
7173
7174 The cost model and cost estimate for pages already in a memory
7175 buffer will be different from pages that needed to be read from
7176 disk. Calculate the number of blocks that likely already are
7177 in memory and the number of blocks that need to be read from
7178 disk.
7179 */
7180 const double busy_blocks_mem =
7181 busy_blocks * table->file->table_in_memory_estimate();
7182 const double busy_blocks_disk = busy_blocks - busy_blocks_mem;
7183 DBUG_ASSERT(busy_blocks_disk >= 0.0);
7184
7185 // Cost of accessing blocks in main memory buffer
7186 sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7187
7188 // Cost of reading blocks from disk in a 'sweep'
7189 const double seek_distance =
7190 (busy_blocks_disk > 1.0) ? n_blocks / busy_blocks_disk : n_blocks;
7191
7192 const double disk_cost =
7193 busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7194 sweep_cost.add_io(disk_cost);
7195
7196 /*
7197 For some cases, ex: when only few blocks need to be read and the
7198 seek distance becomes very large, the sweep cost model can produce
7199 a cost estimate that is larger than the cost of random access.
7200 To handle this case, we use the sweep cost only when it is less
7201 than the random access cost.
7202 */
7203 if (sweep_cost < *cost) *cost = sweep_cost;
7204 }
7205 }
7206 DBUG_PRINT("info", ("returning cost=%g", cost->total_cost()));
7207 }
7208
7209 /****************************************************************************
7210 * DS-MRR implementation ends
7211 ***************************************************************************/
7212
7213 /** @brief
7214 Read first row between two ranges.
7215 Store ranges for future calls to read_range_next.
7216
7217 @param start_key Start key. Is 0 if no min range
7218 @param end_key End key. Is 0 if no max range
7219 @param eq_range_arg Set to 1 if start_key == end_key
7220 @param sorted Set to 1 if result should be sorted per key
7221
7222 @note
7223 Record is read into table->record[0]
7224
7225 @retval
7226 0 Found row
7227 @retval
7228 HA_ERR_END_OF_FILE No rows in range
7229 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted MY_ATTRIBUTE ((unused)))7230 int handler::read_range_first(const key_range *start_key,
7231 const key_range *end_key, bool eq_range_arg,
7232 bool sorted MY_ATTRIBUTE((unused))) {
7233 int result;
7234 DBUG_TRACE;
7235
7236 eq_range = eq_range_arg;
7237 set_end_range(end_key, RANGE_SCAN_ASC);
7238
7239 range_key_part = table->key_info[active_index].key_part;
7240
7241 if (!start_key) // Read first record
7242 result = ha_index_first(table->record[0]);
7243 else
7244 result = ha_index_read_map(table->record[0], start_key->key,
7245 start_key->keypart_map, start_key->flag);
7246 if (result)
7247 return (result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result;
7248
7249 if (compare_key(end_range) > 0) {
7250 /*
7251 The last read row does not fall in the range. So request
7252 storage engine to release row lock if possible.
7253 */
7254 unlock_row();
7255 result = HA_ERR_END_OF_FILE;
7256 }
7257 return result;
7258 }
7259
ha_read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range,bool sorted)7260 int handler::ha_read_range_first(const key_range *start_key,
7261 const key_range *end_key, bool eq_range,
7262 bool sorted) {
7263 int result;
7264 DBUG_TRACE;
7265
7266 // Set status for the need to update generated fields
7267 m_update_generated_read_fields = table->has_gcol();
7268
7269 result = read_range_first(start_key, end_key, eq_range, sorted);
7270 if (!result && m_update_generated_read_fields) {
7271 result =
7272 update_generated_read_fields(table->record[0], table, active_index);
7273 m_update_generated_read_fields = false;
7274 }
7275 table->set_row_status_from_handler(result);
7276 return result;
7277 }
7278
ha_read_range_next()7279 int handler::ha_read_range_next() {
7280 int result;
7281 DBUG_TRACE;
7282
7283 // Set status for the need to update generated fields
7284 m_update_generated_read_fields = table->has_gcol();
7285
7286 result = read_range_next();
7287 if (!result && m_update_generated_read_fields) {
7288 result =
7289 update_generated_read_fields(table->record[0], table, active_index);
7290 m_update_generated_read_fields = false;
7291 }
7292 table->set_row_status_from_handler(result);
7293 return result;
7294 }
7295
7296 /** @brief
7297 Read next row between two endpoints.
7298
7299 @note
7300 Record is read into table->record[0]
7301
7302 @retval
7303 0 Found row
7304 @retval
7305 HA_ERR_END_OF_FILE No rows in range
7306 */
read_range_next()7307 int handler::read_range_next() {
7308 DBUG_TRACE;
7309
7310 int result;
7311 if (eq_range) {
7312 /* We trust that index_next_same always gives a row in range */
7313 result =
7314 ha_index_next_same(table->record[0], end_range->key, end_range->length);
7315 } else {
7316 result = ha_index_next(table->record[0]);
7317 if (result) return result;
7318
7319 if (compare_key(end_range) > 0) {
7320 /*
7321 The last read row does not fall in the range. So request
7322 storage engine to release row lock if possible.
7323 */
7324 unlock_row();
7325 result = HA_ERR_END_OF_FILE;
7326 }
7327 }
7328 return result;
7329 }
7330
7331 /**
7332 Check if one of the columns in a key is a virtual generated column.
7333
7334 @param part the first part of the key to check
7335 @param length the length of the key
7336 @retval true if the key contains a virtual generated column
7337 @retval false if the key does not contain a virtual generated column
7338 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7339 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7340 for (uint len = 0; len < length; len += part->store_length, ++part)
7341 if (part->field->is_virtual_gcol()) return true;
7342 return false;
7343 }
7344
set_end_range(const key_range * range,enum_range_scan_direction direction)7345 void handler::set_end_range(const key_range *range,
7346 enum_range_scan_direction direction) {
7347 if (range) {
7348 save_end_range = *range;
7349 end_range = &save_end_range;
7350 range_key_part = table->key_info[active_index].key_part;
7351 key_compare_result_on_equal =
7352 ((range->flag == HA_READ_BEFORE_KEY)
7353 ? 1
7354 : (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7355 m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7356 } else
7357 end_range = nullptr;
7358
7359 /*
7360 Clear the out-of-range flag in the record buffer when a new range is
7361 started. Also set the in_range_check_pushed_down flag, since the
7362 storage engine needs to do the evaluation of the end-range to avoid
7363 filling the record buffer with out-of-range records.
7364 */
7365 if (m_record_buffer != nullptr) {
7366 m_record_buffer->set_out_of_range(false);
7367 in_range_check_pushed_down = true;
7368 }
7369
7370 range_scan_direction = direction;
7371 }
7372
7373 /**
7374 Compare if found key (in row) is over max-value.
7375
7376 @param range range to compare to row. May be 0 for no range
7377
7378 @sa
7379 key.cc::key_cmp()
7380
7381 @return
7382 The return value is SIGN(key_in_row - range_key):
7383
7384 - 0 : Key is equal to range or 'range' == 0 (no range)
7385 - -1 : Key is less than range
7386 - 1 : Key is larger than range
7387 */
compare_key(key_range * range)7388 int handler::compare_key(key_range *range) {
7389 int cmp;
7390 if (!range || in_range_check_pushed_down) return 0; // No max range
7391 cmp = key_cmp(range_key_part, range->key, range->length);
7392 if (!cmp) cmp = key_compare_result_on_equal;
7393 return cmp;
7394 }
7395
7396 /*
7397 Compare if a found key (in row) is within the range.
7398
7399 This function is similar to compare_key() but checks the range scan
7400 direction to determine if this is a descending scan. This function
7401 is used by the index condition pushdown implementation to determine
7402 if the read record is within the range scan.
7403
7404 @param range Range to compare to row. May be NULL for no range.
7405
7406 @seealso
7407 handler::compare_key()
7408
7409 @return Returns whether the key is within the range
7410
7411 - 0 : Key is equal to range or 'range' == 0 (no range)
7412 - -1 : Key is within the current range
7413 - 1 : Key is outside the current range
7414 */
7415
compare_key_icp(const key_range * range) const7416 int handler::compare_key_icp(const key_range *range) const {
7417 int cmp;
7418 if (!range) return 0; // no max range
7419 cmp = key_cmp(range_key_part, range->key, range->length);
7420 if (!cmp) cmp = key_compare_result_on_equal;
7421 if (range_scan_direction == RANGE_SCAN_DESC) cmp = -cmp;
7422 return cmp;
7423 }
7424
7425 /**
7426 Change the offsets of all the fields in a key range.
7427
7428 @param range the key range
7429 @param key_part the first key part
7430 @param diff how much to change the offsets with
7431 */
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,ptrdiff_t diff)7432 static inline void move_key_field_offsets(const key_range *range,
7433 const KEY_PART_INFO *key_part,
7434 ptrdiff_t diff) {
7435 for (size_t len = 0; len < range->length;
7436 len += key_part->store_length, ++key_part)
7437 key_part->field->move_field_offset(diff);
7438 }
7439
7440 /**
7441 Check if the key in the given buffer (which is not necessarily
7442 TABLE::record[0]) is within range. Called by the storage engine to
7443 avoid reading too many rows.
7444
7445 @param buf the buffer that holds the key
7446 @retval -1 if the key is within the range
7447 @retval 0 if the key is equal to the end_range key, and
7448 key_compare_result_on_equal is 0
7449 @retval 1 if the key is outside the range
7450 */
compare_key_in_buffer(const uchar * buf) const7451 int handler::compare_key_in_buffer(const uchar *buf) const {
7452 DBUG_ASSERT(end_range != nullptr && (m_record_buffer == nullptr ||
7453 !m_record_buffer->is_out_of_range()));
7454
7455 /*
7456 End range on descending scans is only checked with ICP for now, and then we
7457 check it with compare_key_icp() instead of this function.
7458 */
7459 DBUG_ASSERT(range_scan_direction == RANGE_SCAN_ASC);
7460
7461 // Make the fields in the key point into the buffer instead of record[0].
7462 const ptrdiff_t diff = buf - table->record[0];
7463 if (diff != 0) move_key_field_offsets(end_range, range_key_part, diff);
7464
7465 // Compare the key in buf against end_range.
7466 int cmp = key_cmp(range_key_part, end_range->key, end_range->length);
7467 if (cmp == 0) cmp = key_compare_result_on_equal;
7468
7469 // Reset the field offsets.
7470 if (diff != 0) move_key_field_offsets(end_range, range_key_part, -diff);
7471
7472 return cmp;
7473 }
7474
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7475 int handler::index_read_idx_map(uchar *buf, uint index, const uchar *key,
7476 key_part_map keypart_map,
7477 enum ha_rkey_function find_flag) {
7478 int error, error1 = 0;
7479 error = index_init(index, false);
7480 if (!error) {
7481 error = index_read_map(buf, key, keypart_map, find_flag);
7482 error1 = index_end();
7483 }
7484 return error ? error : error1;
7485 }
7486
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7487 uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map) {
7488 /* works only with key prefixes */
7489 DBUG_ASSERT(((keypart_map + 1) & keypart_map) == 0);
7490
7491 KEY *key_info = table->key_info + key;
7492 KEY_PART_INFO *key_part = key_info->key_part;
7493 KEY_PART_INFO *end_key_part = key_part + actual_key_parts(key_info);
7494 uint length = 0;
7495
7496 while (key_part < end_key_part && keypart_map) {
7497 length += key_part->store_length;
7498 keypart_map >>= 1;
7499 key_part++;
7500 }
7501 return length;
7502 }
7503
7504 /**
7505 Returns a list of all known extensions.
7506
7507 No mutexes, worst case race is a minor surplus memory allocation
7508 We have to recreate the extension map if mysqld is restarted (for example
7509 within libmysqld)
7510
7511 @retval
7512 pointer pointer to TYPELIB structure
7513 */
exts_handlerton(THD *,plugin_ref plugin,void * arg)7514 static bool exts_handlerton(THD *, plugin_ref plugin, void *arg) {
7515 List<const char> *found_exts = static_cast<List<const char> *>(arg);
7516 handlerton *hton = plugin_data<handlerton *>(plugin);
7517 if (hton->state == SHOW_OPTION_YES && hton->file_extensions) {
7518 List_iterator_fast<const char> it(*found_exts);
7519 const char **ext, *old_ext;
7520
7521 for (ext = hton->file_extensions; *ext; ext++) {
7522 while ((old_ext = it++)) {
7523 if (!strcmp(old_ext, *ext)) break;
7524 }
7525 if (!old_ext) found_exts->push_back(*ext);
7526
7527 it.rewind();
7528 }
7529 }
7530 return false;
7531 }
7532
ha_known_exts()7533 TYPELIB *ha_known_exts() {
7534 TYPELIB *known_extensions = (TYPELIB *)(*THR_MALLOC)->Alloc(sizeof(TYPELIB));
7535 known_extensions->name = "known_exts";
7536 known_extensions->type_lengths = nullptr;
7537
7538 List<const char> found_exts;
7539 const char **ext, *old_ext;
7540
7541 plugin_foreach(nullptr, exts_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN,
7542 &found_exts);
7543
7544 size_t arr_length = sizeof(char *) * (found_exts.elements + 1);
7545 ext = (const char **)(*THR_MALLOC)->Alloc(arr_length);
7546
7547 DBUG_ASSERT(nullptr != ext);
7548 known_extensions->count = found_exts.elements;
7549 known_extensions->type_names = ext;
7550
7551 List_iterator_fast<const char> it(found_exts);
7552 while ((old_ext = it++)) *ext++ = old_ext;
7553 *ext = nullptr;
7554 return known_extensions;
7555 }
7556
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7557 static bool stat_print(THD *thd, const char *type, size_t type_len,
7558 const char *file, size_t file_len, const char *status,
7559 size_t status_len) {
7560 Protocol *protocol = thd->get_protocol();
7561 protocol->start_row();
7562 protocol->store_string(type, type_len, system_charset_info);
7563 protocol->store_string(file, file_len, system_charset_info);
7564 protocol->store_string(status, status_len, system_charset_info);
7565 if (protocol->end_row()) return true;
7566 return false;
7567 }
7568
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7569 static bool showstat_handlerton(THD *thd, plugin_ref plugin, void *arg) {
7570 enum ha_stat_type stat = *(enum ha_stat_type *)arg;
7571 handlerton *hton = plugin_data<handlerton *>(plugin);
7572 if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7573 hton->show_status(hton, thd, stat_print, stat))
7574 return true;
7575 return false;
7576 }
7577
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7578 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) {
7579 List<Item> field_list;
7580 bool result;
7581
7582 field_list.push_back(new Item_empty_string("Type", 10));
7583 field_list.push_back(new Item_empty_string("Name", FN_REFLEN));
7584 field_list.push_back(new Item_empty_string("Status", 10));
7585
7586 if (thd->send_result_metadata(&field_list,
7587 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7588 return true;
7589
7590 if (db_type == nullptr) {
7591 result = plugin_foreach(thd, showstat_handlerton,
7592 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7593 } else {
7594 if (db_type->state != SHOW_OPTION_YES) {
7595 const LEX_CSTRING *name = &se_plugin_array[db_type->slot]->name;
7596 result = stat_print(thd, name->str, name->length, "", 0, "DISABLED", 8)
7597 ? true
7598 : false;
7599 } else {
7600 DBUG_EXECUTE_IF("simulate_show_status_failure",
7601 DBUG_SET("+d,simulate_net_write_failure"););
7602 result = db_type->show_status &&
7603 db_type->show_status(db_type, thd, stat_print, stat)
7604 ? true
7605 : false;
7606 DBUG_EXECUTE_IF("simulate_show_status_failure",
7607 DBUG_SET("-d,simulate_net_write_failure"););
7608 }
7609 }
7610
7611 if (!result) my_eof(thd);
7612 return result;
7613 }
7614
7615 /*
7616 Function to check if the conditions for row-based binlogging is
7617 correct for the table.
7618
7619 A row in the given table should be replicated if:
7620 - Row-based replication is enabled in the current thread
7621 - The binlog is enabled
7622 - It is not a temporary table
7623 - The binary log is open
7624 - The database the table resides in shall be binlogged (binlog_*_db rules)
7625 - table is not mysql.event
7626 */
7627
check_table_binlog_row_based(THD * thd,TABLE * table)7628 static bool check_table_binlog_row_based(THD *thd, TABLE *table) {
7629 if (table->s->cached_row_logging_check == -1) {
7630 int const check(table->s->tmp_table == NO_TMP_TABLE &&
7631 !table->no_replicate &&
7632 binlog_filter->db_ok(table->s->db.str));
7633 table->s->cached_row_logging_check = check;
7634 }
7635
7636 DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
7637 table->s->cached_row_logging_check == 1);
7638
7639 return (thd->is_current_stmt_binlog_format_row() &&
7640 table->s->cached_row_logging_check &&
7641 (thd->variables.option_bits & OPTION_BIN_LOG) &&
7642 mysql_bin_log.is_open());
7643 }
7644
7645 /** @brief
7646 Write table maps for all (manually or automatically) locked tables
7647 to the binary log.
7648
7649 SYNOPSIS
7650 write_locked_table_maps()
7651 thd Pointer to THD structure
7652
7653 DESCRIPTION
7654 This function will generate and write table maps for all tables
7655 that are locked by the thread 'thd'.
7656
7657 RETURN VALUE
7658 0 All OK
7659 1 Failed to write all table maps
7660
7661 SEE ALSO
7662 THD::lock
7663 */
7664
write_locked_table_maps(THD * thd)7665 static int write_locked_table_maps(THD *thd) {
7666 DBUG_TRACE;
7667 DBUG_PRINT("enter", ("thd: %p thd->lock: %p "
7668 "thd->extra_lock: %p",
7669 thd, thd->lock, thd->extra_lock));
7670
7671 DBUG_PRINT("debug",
7672 ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7673
7674 if (thd->get_binlog_table_maps() == 0) {
7675 for (MYSQL_LOCK *lock : {thd->extra_lock, thd->lock}) {
7676 if (lock == nullptr) continue;
7677
7678 bool need_binlog_rows_query = thd->variables.binlog_rows_query_log_events;
7679 TABLE **const end_ptr = lock->table + lock->table_count;
7680 for (TABLE **table_ptr = lock->table; table_ptr != end_ptr; ++table_ptr) {
7681 TABLE *const table = *table_ptr;
7682 DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7683 if (table->current_lock == F_WRLCK &&
7684 check_table_binlog_row_based(thd, table)) {
7685 /*
7686 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7687 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7688 compatible behavior with the STMT based replication even when
7689 the table is not transactional. In other words, if the operation
7690 fails while executing the insert phase nothing is written to the
7691 binlog.
7692
7693 Note that at this point, we check the type of a set of tables to
7694 create the table map events. In the function binlog_log_row(),
7695 which calls the current function, we check the type of the table
7696 of the current row.
7697 */
7698 bool const has_trans = thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7699 table->file->has_transactions();
7700 int const error = thd->binlog_write_table_map(table, has_trans,
7701 need_binlog_rows_query);
7702 /* Binlog Rows_query log event once for one statement which updates
7703 two or more tables.*/
7704 if (need_binlog_rows_query) need_binlog_rows_query = false;
7705 /*
7706 If an error occurs, it is the responsibility of the caller to
7707 roll back the transaction.
7708 */
7709 if (unlikely(error)) return 1;
7710 }
7711 }
7712 }
7713 }
7714 return 0;
7715 }
7716
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7717 int binlog_log_row(TABLE *table, const uchar *before_record,
7718 const uchar *after_record, Log_func *log_func) {
7719 bool error = false;
7720 THD *const thd = table->in_use;
7721
7722 if (check_table_binlog_row_based(thd, table)) {
7723 if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF) {
7724 if (before_record && after_record) {
7725 /* capture both images pke */
7726 add_pke(table, thd, table->record[0]);
7727 add_pke(table, thd, table->record[1]);
7728 } else {
7729 add_pke(table, thd, table->record[0]);
7730 }
7731 }
7732 if (table->in_use->is_error()) return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7733
7734 DBUG_DUMP("read_set 10", (uchar *)table->read_set->bitmap,
7735 (table->s->fields + 7) / 8);
7736
7737 /*
7738 If there are no table maps written to the binary log, this is
7739 the first row handled in this statement. In that case, we need
7740 to write table maps for all locked tables to the binary log.
7741 */
7742 if (likely(!(error = write_locked_table_maps(thd)))) {
7743 /*
7744 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7745 (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7746 compatible behavior with the STMT based replication even when
7747 the table is not transactional. In other words, if the operation
7748 fails while executing the insert phase nothing is written to the
7749 binlog.
7750 */
7751 bool const has_trans = thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7752 table->file->has_transactions();
7753 error = (*log_func)(thd, table, has_trans, before_record, after_record);
7754 }
7755 }
7756
7757 return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
7758 }
7759
ha_external_lock(THD * thd,int lock_type)7760 int handler::ha_external_lock(THD *thd, int lock_type) {
7761 int error;
7762 DBUG_TRACE;
7763 /*
7764 Whether this is lock or unlock, this should be true, and is to verify that
7765 if get_auto_increment() was called (thus may have reserved intervals or
7766 taken a table lock), ha_release_auto_increment() was too.
7767 */
7768 DBUG_ASSERT(next_insert_id == 0);
7769 /* Consecutive calls for lock without unlocking in between is not allowed */
7770 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE ||
7771 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
7772 lock_type == F_UNLCK));
7773 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
7774 DBUG_ASSERT(inited == NONE || table->open_by_handler);
7775
7776 ha_statistic_increment(&System_status_var::ha_external_lock_count);
7777
7778 MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
7779 { error = external_lock(thd, lock_type); })
7780
7781 /*
7782 We cache the table flags if the locking succeeded. Otherwise, we
7783 keep them as they were when they were fetched in ha_open().
7784 */
7785
7786 if (error == 0) {
7787 /*
7788 The lock type is needed by MRR when creating a clone of this handler
7789 object.
7790 */
7791 m_lock_type = lock_type;
7792 cached_table_flags = table_flags();
7793 }
7794
7795 return error;
7796 }
7797
7798 /** @brief
7799 Check handler usage and reset state of file to after 'open'
7800
7801 @note can be called regardless of it is locked or not.
7802 */
ha_reset()7803 int handler::ha_reset() {
7804 DBUG_TRACE;
7805 /* Check that we have called all proper deallocation functions */
7806 DBUG_ASSERT((uchar *)table->def_read_set.bitmap +
7807 table->s->column_bitmap_size ==
7808 (uchar *)table->def_write_set.bitmap);
7809 DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
7810 DBUG_ASSERT(table->key_read == 0);
7811 /* ensure that ha_index_end / ha_rnd_end has been called */
7812 DBUG_ASSERT(inited == NONE);
7813 /* Free cache used by filesort */
7814 free_io_cache(table);
7815 /* reset the bitmaps to point to defaults */
7816 table->default_column_bitmaps();
7817 /* Reset the handler flags used for dupilcate record handling */
7818 table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
7819 table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
7820 /* Reset information about pushed engine conditions */
7821 pushed_cond = nullptr;
7822 /* Reset information about pushed index conditions */
7823 cancel_pushed_idx_cond();
7824 // Forget the record buffer.
7825 m_record_buffer = nullptr;
7826 m_unique = nullptr;
7827
7828 const int retval = reset();
7829 return retval;
7830 }
7831
ha_write_row(uchar * buf)7832 int handler::ha_write_row(uchar *buf) {
7833 int error;
7834 Log_func *log_func = Write_rows_log_event::binlog_row_logging_function;
7835 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7836
7837 DBUG_TRACE;
7838 DBUG_EXECUTE_IF("inject_error_ha_write_row", return HA_ERR_INTERNAL_ERROR;);
7839 DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
7840 return HA_ERR_SE_OUT_OF_MEMORY;);
7841 mark_trx_read_write();
7842
7843 DBUG_EXECUTE_IF(
7844 "handler_crashed_table_on_usage",
7845 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7846 set_my_errno(HA_ERR_CRASHED); return HA_ERR_CRASHED;);
7847
7848 MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
7849 { error = write_row(buf); })
7850
7851 if (unlikely(error)) return error;
7852
7853 if (unlikely((error = binlog_log_row(table, nullptr, buf, log_func))))
7854 return error; /* purecov: inspected */
7855
7856 DEBUG_SYNC_C("ha_write_row_end");
7857 return 0;
7858 }
7859
ha_update_row(const uchar * old_data,uchar * new_data)7860 int handler::ha_update_row(const uchar *old_data, uchar *new_data) {
7861 int error;
7862 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7863 Log_func *log_func = Update_rows_log_event::binlog_row_logging_function;
7864
7865 /*
7866 Some storage engines require that the new record is in record[0]
7867 (and the old record is in record[1]).
7868 */
7869 DBUG_ASSERT(new_data == table->record[0]);
7870 DBUG_ASSERT(old_data == table->record[1]);
7871
7872 mark_trx_read_write();
7873
7874 DBUG_EXECUTE_IF(
7875 "handler_crashed_table_on_usage",
7876 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7877 set_my_errno(HA_ERR_CRASHED); return (HA_ERR_CRASHED););
7878
7879 MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
7880 { error = update_row(old_data, new_data); })
7881
7882 if (unlikely(error)) return error;
7883 if (unlikely((error = binlog_log_row(table, old_data, new_data, log_func))))
7884 return error;
7885 return 0;
7886 }
7887
ha_delete_row(const uchar * buf)7888 int handler::ha_delete_row(const uchar *buf) {
7889 int error;
7890 DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);
7891 Log_func *log_func = Delete_rows_log_event::binlog_row_logging_function;
7892 /*
7893 Normally table->record[0] is used, but sometimes table->record[1] is used.
7894 */
7895 DBUG_ASSERT(buf == table->record[0] || buf == table->record[1]);
7896 DBUG_EXECUTE_IF("inject_error_ha_delete_row", return HA_ERR_INTERNAL_ERROR;);
7897
7898 DBUG_EXECUTE_IF(
7899 "handler_crashed_table_on_usage",
7900 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
7901 set_my_errno(HA_ERR_CRASHED); return (HA_ERR_CRASHED););
7902
7903 mark_trx_read_write();
7904
7905 MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
7906 { error = delete_row(buf); })
7907
7908 if (unlikely(error)) return error;
7909 if (unlikely((error = binlog_log_row(table, buf, nullptr, log_func))))
7910 return error;
7911 return 0;
7912 }
7913
7914 /** @brief
7915 use_hidden_primary_key() is called in case of an update/delete when
7916 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
7917 but we don't have a primary key
7918 */
use_hidden_primary_key()7919 void handler::use_hidden_primary_key() {
7920 /* fallback to use all columns in the table to identify row */
7921 table->use_all_columns();
7922 }
7923
7924 /**
7925 Get an initialized ha_share.
7926
7927 @return Initialized ha_share
7928 @retval NULL ha_share is not yet initialized.
7929 @retval != NULL previous initialized ha_share.
7930
7931 @note
7932 If not a temp table, then LOCK_ha_data must be held.
7933 */
7934
get_ha_share_ptr()7935 Handler_share *handler::get_ha_share_ptr() {
7936 DBUG_TRACE;
7937 DBUG_ASSERT(ha_share && table_share);
7938
7939 #ifndef DBUG_OFF
7940 if (table_share->tmp_table == NO_TMP_TABLE)
7941 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7942 #endif
7943
7944 return *ha_share;
7945 }
7946
7947 /**
7948 Set ha_share to be used by all instances of the same table/partition.
7949
7950 @param arg_ha_share Handler_share to be shared.
7951
7952 @note
7953 If not a temp table, then LOCK_ha_data must be held.
7954 */
7955
set_ha_share_ptr(Handler_share * arg_ha_share)7956 void handler::set_ha_share_ptr(Handler_share *arg_ha_share) {
7957 DBUG_TRACE;
7958 DBUG_ASSERT(ha_share);
7959 #ifndef DBUG_OFF
7960 if (table_share->tmp_table == NO_TMP_TABLE)
7961 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
7962 #endif
7963
7964 *ha_share = arg_ha_share;
7965 }
7966
7967 /**
7968 Take a lock for protecting shared handler data.
7969 */
7970
lock_shared_ha_data()7971 void handler::lock_shared_ha_data() {
7972 DBUG_ASSERT(table_share);
7973 if (table_share->tmp_table == NO_TMP_TABLE)
7974 mysql_mutex_lock(&table_share->LOCK_ha_data);
7975 }
7976
7977 /**
7978 Release lock for protecting ha_share.
7979 */
7980
unlock_shared_ha_data()7981 void handler::unlock_shared_ha_data() {
7982 DBUG_ASSERT(table_share);
7983 if (table_share->tmp_table == NO_TMP_TABLE)
7984 mysql_mutex_unlock(&table_share->LOCK_ha_data);
7985 }
7986
7987 /**
7988 This structure is a helper structure for passing the length and pointer of
7989 blob space allocated by storage engine.
7990 */
7991 struct blob_len_ptr {
7992 uint length; // length of the blob
7993 uchar *ptr; // pointer of the value
7994 };
7995
7996 /**
7997 Get the blob length and pointer of allocated space from the record buffer.
7998
7999 During evaluating the blob virtual generated columns, the blob space will
8000 be allocated by server. In order to keep the blob data after the table is
8001 closed, we need write the data into a specified space allocated by storage
8002 engine. Here, we have to extract the space pointer and length from the
8003 record buffer.
8004 After we get the value of virtual generated columns, copy the data into
8005 the specified space and store it in the record buffer (@see copy_blob_data()).
8006
8007 @param table the pointer of table
8008 @param fields bitmap of field index of evaluated
8009 generated column
8010 @param[out] blob_len_ptr_array an array to record the length and pointer
8011 of allocated space by storage engine.
8012 @note The caller should provide the blob_len_ptr_array with a size of
8013 MAX_FIELDS.
8014 */
8015
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8016 static void extract_blob_space_and_length_from_record_buff(
8017 const TABLE *table, const MY_BITMAP *const fields,
8018 blob_len_ptr *blob_len_ptr_array) {
8019 int num = 0;
8020 for (Field **vfield = table->vfield; *vfield; vfield++) {
8021 // Check if this field should be included
8022 if (bitmap_is_set(fields, (*vfield)->field_index()) &&
8023 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB) {
8024 auto field = down_cast<Field_blob *>(*vfield);
8025 blob_len_ptr_array[num].length = field->data_length();
8026 // TODO: The following check is only for Innodb.
8027 DBUG_ASSERT(blob_len_ptr_array[num].length == 255 ||
8028 blob_len_ptr_array[num].length == 768 ||
8029 blob_len_ptr_array[num].length == 3073);
8030
8031 blob_len_ptr_array[num].ptr = field->get_blob_data();
8032
8033 // Let server allocate the space for BLOB virtual generated columns
8034 field->reset();
8035
8036 num++;
8037 DBUG_ASSERT(num <= MAX_FIELDS);
8038 }
8039 }
8040 }
8041
8042 /**
8043 Copy the value of BLOB virtual generated columns into the space allocated
8044 by storage engine.
8045
8046 This is because the table is closed after evaluating the value. In order to
8047 keep the BLOB value after the table is closed, we have to copy the value into
8048 the place where storage engine prepares for.
8049
8050 @param table pointer of the table to be operated on
8051 @param fields bitmap of field index of evaluated generated column
8052 @param blob_len_ptr_array array of length and pointer of allocated space by
8053 storage engine.
8054 */
8055
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8056 static void copy_blob_data(const TABLE *table, const MY_BITMAP *const fields,
8057 blob_len_ptr *blob_len_ptr_array) {
8058 uint num = 0;
8059 for (Field **vfield = table->vfield; *vfield; vfield++) {
8060 // Check if this field should be included
8061 if (bitmap_is_set(fields, (*vfield)->field_index()) &&
8062 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB) {
8063 DBUG_ASSERT(blob_len_ptr_array[num].length > 0);
8064 DBUG_ASSERT(blob_len_ptr_array[num].ptr != nullptr);
8065
8066 /*
8067 Only copy as much of the blob as the storage engine has
8068 allocated space for. This is sufficient since the only use of the
8069 blob in the storage engine is for using a prefix of it in a
8070 secondary index.
8071 */
8072 uint length = (*vfield)->data_length();
8073 const uint alloc_len = blob_len_ptr_array[num].length;
8074 length = length > alloc_len ? alloc_len : length;
8075
8076 Field_blob *blob_field = down_cast<Field_blob *>(*vfield);
8077 memcpy(blob_len_ptr_array[num].ptr, blob_field->get_blob_data(), length);
8078 blob_field->store_in_allocated_space(
8079 pointer_cast<char *>(blob_len_ptr_array[num].ptr), length);
8080 num++;
8081 DBUG_ASSERT(num <= MAX_FIELDS);
8082 }
8083 }
8084 }
8085
8086 /*
8087 Evaluate generated column's value. This is an internal helper reserved for
8088 handler::my_eval_gcolumn_expr().
8089
8090 @param thd pointer of THD
8091 @param table The pointer of table where evaluted generated
8092 columns are in
8093 @param fields bitmap of field index of evaluated generated column
8094 @param[in,out] record record buff of base columns generated column depends.
8095 After calling this function, it will be used to return
8096 the value of generated column.
8097 @param in_purge whether the function is called by purge thread
8098
8099 @return true in case of error, false otherwise.
8100 */
8101
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge,const char ** mv_data_ptr,ulong * mv_length)8102 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8103 const MY_BITMAP *const fields,
8104 uchar *record, bool in_purge,
8105 const char **mv_data_ptr,
8106 ulong *mv_length) {
8107 DBUG_TRACE;
8108 DBUG_ASSERT(table && table->vfield);
8109 DBUG_ASSERT(!thd->is_error());
8110
8111 uchar *old_buf = table->record[0];
8112 repoint_field_to_record(table, old_buf, record);
8113
8114 blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8115
8116 /*
8117 If it's purge thread, we need get the space allocated by storage engine
8118 for blob.
8119 */
8120 if (in_purge)
8121 extract_blob_space_and_length_from_record_buff(table, fields,
8122 blob_len_ptr_array);
8123
8124 bool res = false;
8125 Field *mv_field = nullptr;
8126 MY_BITMAP fields_to_evaluate;
8127 my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8128 bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields);
8129 bitmap_set_all(&fields_to_evaluate);
8130 bitmap_intersect(&fields_to_evaluate, fields);
8131 /*
8132 In addition to evaluating the value for the columns requested by
8133 the caller we also need to evaluate any virtual columns that these
8134 depend on.
8135 This loop goes through the columns that should be evaluated and
8136 adds all the base columns. If the base column is virtual, it has
8137 to be evaluated.
8138 */
8139 for (Field **vfield_ptr = table->vfield; *vfield_ptr; vfield_ptr++) {
8140 Field *field = *vfield_ptr;
8141 // Validate that the field number is less than the bit map size
8142 DBUG_ASSERT(field->field_index() < fields->n_bits);
8143
8144 if (bitmap_is_set(fields, field->field_index())) {
8145 bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8146 if (field->is_array()) {
8147 mv_field = field;
8148 // Backup current value and use dedicated temporary buffer
8149 if ((down_cast<Field_blob *>(field))->backup_blob_field()) return true;
8150 }
8151 }
8152 }
8153
8154 /*
8155 Evaluate all requested columns and all base columns these depends
8156 on that are virtual.
8157
8158 This function is called by the storage engine, which may request to
8159 evaluate more generated columns than read_set/write_set says.
8160 For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8161 record from the clustered index and asks us to compute generated columns
8162 that match key fields in the used secondary index. So we trust that the
8163 engine has filled all base columns necessary to requested computations,
8164 and we ignore read_set/write_set.
8165 */
8166
8167 my_bitmap_map *old_maps[2];
8168 dbug_tmp_use_all_columns(table, old_maps, table->read_set, table->write_set);
8169
8170 for (Field **vfield_ptr = table->vfield; *vfield_ptr; vfield_ptr++) {
8171 Field *field = *vfield_ptr;
8172
8173 // Check if we should evaluate this field
8174 if (bitmap_is_set(&fields_to_evaluate, field->field_index()) &&
8175 field->is_virtual_gcol()) {
8176 DBUG_ASSERT(field->gcol_info && field->gcol_info->expr_item->fixed);
8177
8178 const type_conversion_status save_in_field_status =
8179 field->gcol_info->expr_item->save_in_field(field, false);
8180 DBUG_ASSERT(!thd->is_error() || save_in_field_status != TYPE_OK);
8181
8182 /*
8183 save_in_field() may return non-zero even if there was no
8184 error. This happens if a warning is raised, such as an
8185 out-of-range warning when converting the result to the target
8186 type of the virtual column. We should stop only if the
8187 non-zero return value was caused by an actual error.
8188 */
8189 if (save_in_field_status != TYPE_OK && thd->is_error()) {
8190 res = true;
8191 break;
8192 }
8193 }
8194 }
8195
8196 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8197
8198 /*
8199 If it's a purge thread, we need copy the blob data into specified place
8200 allocated by storage engine so that the blob data still can be accessed
8201 after table is closed.
8202 */
8203 if (in_purge) copy_blob_data(table, fields, blob_len_ptr_array);
8204
8205 if (mv_field) {
8206 DBUG_ASSERT(mv_data_ptr);
8207 Field_json *fld = down_cast<Field_json *>(mv_field);
8208 // Save calculated value
8209 *mv_data_ptr = fld->get_binary();
8210 *mv_length = fld->data_length();
8211 // Restore original value
8212 (fld)->restore_blob_backup();
8213 }
8214
8215 repoint_field_to_record(table, record, old_buf);
8216 return res;
8217 }
8218
8219 // Set se_private_id and se_private_data during upgrade
ha_upgrade_table(THD * thd,const char * dbname,const char * table_name,dd::Table * dd_table,TABLE * table_arg)8220 bool handler::ha_upgrade_table(THD *thd, const char *dbname,
8221 const char *table_name, dd::Table *dd_table,
8222 TABLE *table_arg) {
8223 table = table_arg;
8224 return upgrade_table(thd, dbname, table_name, dd_table);
8225 }
8226
8227 /**
8228 Callback to allow InnoDB to prepare a template for generated
8229 column processing. This function will open the table without
8230 opening in the engine and call the provided function with
8231 the TABLE object made. The function will then close the TABLE.
8232
8233 @param thd Thread handle
8234 @param db_name Name of database containing the table
8235 @param table_name Name of table to open
8236 @param myc InnoDB function to call for processing TABLE
8237 @param ib_table Argument for InnoDB function
8238
8239 @return true in case of error, false otherwise.
8240 */
8241
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8242 bool handler::my_prepare_gcolumn_template(THD *thd, const char *db_name,
8243 const char *table_name,
8244 my_gcolumn_template_callback_t myc,
8245 void *ib_table) {
8246 char path[FN_REFLEN + 1];
8247 bool was_truncated;
8248 build_table_filename(path, sizeof(path) - 1 - reg_ext_length, db_name,
8249 table_name, "", 0, &was_truncated);
8250 DBUG_ASSERT(!was_truncated);
8251 bool rc = true;
8252
8253 MDL_ticket *mdl_ticket = nullptr;
8254 if (dd::acquire_shared_table_mdl(thd, db_name, table_name, false,
8255 &mdl_ticket))
8256 return true;
8257
8258 TABLE *table = nullptr;
8259 {
8260 dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
8261 const dd::Table *tab_obj = nullptr;
8262 if (thd->dd_client()->acquire(db_name, table_name, &tab_obj)) return true;
8263 DBUG_ASSERT(tab_obj);
8264
8265 // Note! The second-to-last argument to open_table_uncached() must be false,
8266 // since the table already exists in the TDC. Allowing the table to
8267 // be opened in the SE in this case is dangerous as the two shares
8268 // could get conflicting SE private data.
8269 table = open_table_uncached(thd, path, db_name, table_name, false, false,
8270 *tab_obj);
8271 }
8272
8273 dd::release_mdl(thd, mdl_ticket);
8274
8275 if (table) {
8276 myc(table, ib_table);
8277 intern_close_table(table);
8278 rc = false;
8279 }
8280 return rc;
8281 }
8282
8283 /**
8284 Callback for generated columns processing. Will open the table, in the
8285 server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8286 processing. This function is a variant of the other
8287 handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8288 object already exists - e.g. from purge threads.
8289
8290 Note! The call to open_table_uncached() must be made with the second-to-last
8291 argument (open_in_engine) set to false. Failing to do so will cause
8292 deadlocks and incorrect behavior.
8293
8294 @param thd thread handle
8295 @param db_name database containing the table to open
8296 @param table_name name of table to open
8297 @param fields bitmap of field index of evaluated generated column
8298 @param record record buffer
8299 @param[out] mv_data_ptr For a typed array field in this arg the pointer
8300 to its value is returned
8301 @param[out] mv_length Length of the value above
8302
8303 @return true in case of error, false otherwise.
8304 */
8305
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record,const char ** mv_data_ptr,ulong * mv_length)8306 bool handler::my_eval_gcolumn_expr_with_open(THD *thd, const char *db_name,
8307 const char *table_name,
8308 const MY_BITMAP *const fields,
8309 uchar *record,
8310 const char **mv_data_ptr,
8311 ulong *mv_length) {
8312 bool retval = true;
8313
8314 char path[FN_REFLEN + 1];
8315 bool was_truncated;
8316 build_table_filename(path, sizeof(path) - 1 - reg_ext_length, db_name,
8317 table_name, "", 0, &was_truncated);
8318 DBUG_ASSERT(!was_truncated);
8319
8320 MDL_ticket *mdl_ticket = nullptr;
8321 if (dd::acquire_shared_table_mdl(thd, db_name, table_name, false,
8322 &mdl_ticket))
8323 return true;
8324
8325 TABLE *table = nullptr;
8326 {
8327 dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
8328 const dd::Table *tab_obj = nullptr;
8329 if (thd->dd_client()->acquire(db_name, table_name, &tab_obj)) return true;
8330 DBUG_ASSERT(tab_obj);
8331
8332 table = open_table_uncached(thd, path, db_name, table_name, false, false,
8333 *tab_obj);
8334 }
8335
8336 dd::release_mdl(thd, mdl_ticket);
8337
8338 if (table) {
8339 retval = my_eval_gcolumn_expr_helper(thd, table, fields, record, true,
8340 mv_data_ptr, mv_length);
8341 intern_close_table(table);
8342 }
8343
8344 return retval;
8345 }
8346
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,const char ** mv_data_ptr,ulong * mv_length)8347 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8348 const MY_BITMAP *const fields, uchar *record,
8349 const char **mv_data_ptr, ulong *mv_length) {
8350 DBUG_TRACE;
8351
8352 const bool res = my_eval_gcolumn_expr_helper(thd, table, fields, record,
8353 false, mv_data_ptr, mv_length);
8354 return res;
8355 }
8356
filter_dup_records()8357 bool handler::filter_dup_records() {
8358 DBUG_ASSERT(inited == INDEX && m_unique);
8359 position(table->record[0]);
8360 return m_unique->unique_add(ref);
8361 }
8362
ha_extra(enum ha_extra_function operation)8363 int handler::ha_extra(enum ha_extra_function operation) {
8364 if (operation == HA_EXTRA_ENABLE_UNIQUE_RECORD_FILTER) {
8365 // This operation should be called only for active multi-valued index
8366 DBUG_ASSERT(inited == INDEX &&
8367 (table->key_info[active_index].flags & HA_MULTI_VALUED_KEY));
8368 // This unique filter uses only row id to weed out duplicates. Due to that
8369 // it will work with any active index.
8370 if (!m_unique &&
8371 (!(m_unique = new (*THR_MALLOC) Unique_on_insert(ref_length)) ||
8372 m_unique->init())) {
8373 /* purecov: begin inspected */
8374 destroy(m_unique);
8375 return HA_ERR_OUT_OF_MEM;
8376 /* purecov: end */
8377 }
8378 m_unique->reset(true);
8379 return 0;
8380 } else if (operation == HA_EXTRA_DISABLE_UNIQUE_RECORD_FILTER) {
8381 if (m_unique) {
8382 m_unique->cleanup();
8383 destroy(m_unique);
8384 m_unique = nullptr;
8385 }
8386 }
8387 return extra(operation);
8388 }
8389
8390 /**
8391 Auxiliary structure for passing information to notify_*_helper()
8392 functions.
8393 */
8394
8395 struct HTON_NOTIFY_PARAMS {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8396 HTON_NOTIFY_PARAMS(const MDL_key *mdl_key, ha_notification_type mdl_type)
8397 : key(mdl_key),
8398 notification_type(mdl_type),
8399 some_htons_were_notified(false),
8400 victimized(false) {}
8401
8402 const MDL_key *key;
8403 const ha_notification_type notification_type;
8404 bool some_htons_were_notified;
8405 bool victimized;
8406 };
8407
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8408 static bool notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin,
8409 void *arg) {
8410 handlerton *hton = plugin_data<handlerton *>(plugin);
8411 if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl) {
8412 HTON_NOTIFY_PARAMS *params = reinterpret_cast<HTON_NOTIFY_PARAMS *>(arg);
8413
8414 if (hton->notify_exclusive_mdl(thd, params->key, params->notification_type,
8415 ¶ms->victimized)) {
8416 // Ignore failures from post event notification.
8417 if (params->notification_type == HA_NOTIFY_PRE_EVENT) return true;
8418 } else
8419 params->some_htons_were_notified = true;
8420 }
8421 return false;
8422 }
8423
8424 /**
8425 Notify/get permission from all interested storage engines before
8426 acquisition or after release of exclusive metadata lock on object
8427 represented by key.
8428
8429 @param thd Thread context.
8430 @param mdl_key MDL key identifying object on which exclusive
8431 lock is to be acquired/was released.
8432 @param notification_type Indicates whether this is pre-acquire or
8433 post-release notification.
8434 @param victimized 'true' if locking failed as we were selected
8435 as a victim in order to avoid possible deadlocks.
8436
8437 See @sa handlerton::notify_exclusive_mdl for details about
8438 calling convention and error reporting.
8439
8440 @return False - if notification was successful/lock can be acquired,
8441 True - if it has failed/lock should not be acquired.
8442 */
8443
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8444 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8445 ha_notification_type notification_type,
8446 bool *victimized) {
8447 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8448 *victimized = false;
8449 if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8450 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms)) {
8451 *victimized = params.victimized;
8452 /*
8453 If some SE hasn't given its permission to acquire lock and some SEs
8454 has given their permissions, we need to notify the latter group about
8455 failed lock acquisition. We do this by calling post-release notification
8456 for all interested SEs unconditionally.
8457 */
8458 if (notification_type == HA_NOTIFY_PRE_EVENT &&
8459 params.some_htons_were_notified) {
8460 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8461 (void)plugin_foreach(thd, notify_exclusive_mdl_helper,
8462 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8463 }
8464 return true;
8465 }
8466 return false;
8467 }
8468
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8469 static bool notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg) {
8470 handlerton *hton = plugin_data<handlerton *>(plugin);
8471 if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table) {
8472 HTON_NOTIFY_PARAMS *params = reinterpret_cast<HTON_NOTIFY_PARAMS *>(arg);
8473
8474 if (hton->notify_alter_table(thd, params->key, params->notification_type)) {
8475 // Ignore failures from post event notification.
8476 if (params->notification_type == HA_NOTIFY_PRE_EVENT) return true;
8477 } else
8478 params->some_htons_were_notified = true;
8479 }
8480 return false;
8481 }
8482
8483 /**
8484 Notify/get permission from all interested storage engines before
8485 or after executed ALTER TABLE on the table identified by key.
8486
8487 @param thd Thread context.
8488 @param mdl_key MDL key identifying table.
8489 @param notification_type Indicates whether this is pre-ALTER or
8490 post-ALTER notification.
8491
8492 See @sa handlerton::notify_alter_table for rationale,
8493 details about calling convention and error reporting.
8494
8495 @return False - if notification was successful/ALTER TABLE can
8496 proceed.
8497 True - if it has failed/ALTER TABLE should fail.
8498 */
8499
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)8500 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
8501 ha_notification_type notification_type) {
8502 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8503
8504 if (plugin_foreach(thd, notify_alter_table_helper,
8505 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms)) {
8506 /*
8507 If some SE hasn't given its permission to do ALTER TABLE and some SEs
8508 has given their permissions, we need to notify the latter group about
8509 failed attemopt. We do this by calling post-ALTER TABLE notification
8510 for all interested SEs unconditionally.
8511 */
8512 if (notification_type == HA_NOTIFY_PRE_EVENT &&
8513 params.some_htons_were_notified) {
8514 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8515 (void)plugin_foreach(thd, notify_alter_table_helper,
8516 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8517 }
8518 return true;
8519 }
8520 return false;
8521 }
8522
8523 /**
8524 Set the transaction isolation level for the next transaction and update
8525 session tracker information about the transaction isolation level.
8526
8527 @param thd THD session setting the tx_isolation.
8528 @param tx_isolation The isolation level to be set.
8529 @param one_shot True if the isolation level should be restored to
8530 session default after finishing the transaction.
8531 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)8532 bool set_tx_isolation(THD *thd, enum_tx_isolation tx_isolation, bool one_shot) {
8533 TX_TRACKER_GET(tst);
8534
8535 if (thd->variables.session_track_transaction_info <= TX_TRACK_NONE)
8536 tst = nullptr;
8537
8538 thd->tx_isolation = tx_isolation;
8539
8540 if (one_shot) {
8541 DBUG_ASSERT(!thd->in_active_multi_stmt_transaction());
8542 DBUG_ASSERT(!thd->in_sub_stmt);
8543 enum enum_tx_isol_level l;
8544 switch (thd->tx_isolation) {
8545 case ISO_READ_UNCOMMITTED:
8546 l = TX_ISOL_UNCOMMITTED;
8547 break;
8548 case ISO_READ_COMMITTED:
8549 l = TX_ISOL_COMMITTED;
8550 break;
8551 case ISO_REPEATABLE_READ:
8552 l = TX_ISOL_REPEATABLE;
8553 break;
8554 case ISO_SERIALIZABLE:
8555 l = TX_ISOL_SERIALIZABLE;
8556 break;
8557 default:
8558 DBUG_ASSERT(0);
8559 return true;
8560 }
8561 if (tst) tst->set_isol_level(thd, l);
8562 } else if (tst) {
8563 tst->set_isol_level(thd, TX_ISOL_INHERIT);
8564 }
8565 return false;
8566 }
8567
post_recover_handlerton(THD *,plugin_ref plugin,void *)8568 static bool post_recover_handlerton(THD *, plugin_ref plugin, void *) {
8569 handlerton *hton = plugin_data<handlerton *>(plugin);
8570
8571 if (hton->state == SHOW_OPTION_YES && hton->post_recover)
8572 hton->post_recover();
8573
8574 return false;
8575 }
8576
ha_post_recover(void)8577 void ha_post_recover(void) {
8578 (void)plugin_foreach(nullptr, post_recover_handlerton,
8579 MYSQL_STORAGE_ENGINE_PLUGIN, nullptr);
8580 }
8581
ha_set_primary_handler(handler * primary_handler)8582 void handler::ha_set_primary_handler(handler *primary_handler) {
8583 DBUG_ASSERT((ht->flags & HTON_IS_SECONDARY_ENGINE) != 0);
8584 DBUG_ASSERT(primary_handler->table->s->has_secondary_engine());
8585 m_primary_handler = primary_handler;
8586 }
8587
8588 /**
8589 Checks if the database name is reserved word used by SE by invoking
8590 the handlerton method.
8591
8592 @param plugin SE plugin.
8593 @param name Database name.
8594
8595 @retval true If the name is reserved word.
8596 @retval false If the name is not reserved word.
8597 */
is_reserved_db_name_handlerton(THD *,plugin_ref plugin,void * name)8598 static bool is_reserved_db_name_handlerton(THD *, plugin_ref plugin,
8599 void *name) {
8600 handlerton *hton = plugin_data<handlerton *>(plugin);
8601 if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8602 return (hton->is_reserved_db_name(hton, (const char *)name));
8603 return false;
8604 }
8605
8606 /**
8607 Check if the database name is reserved word used by SE.
8608
8609 @param name Database name.
8610
8611 @retval true If the name is a reserved word.
8612 @retval false If the name is not a reserved word.
8613 */
ha_check_reserved_db_name(const char * name)8614 bool ha_check_reserved_db_name(const char *name) {
8615 return (plugin_foreach(nullptr, is_reserved_db_name_handlerton,
8616 MYSQL_STORAGE_ENGINE_PLUGIN,
8617 const_cast<char *>(name)));
8618 }
8619
8620 /**
8621 Check whether an error is index access error or not
8622 after an index read. Error other than HA_ERR_END_OF_FILE
8623 or HA_ERR_KEY_NOT_FOUND will stop next index read.
8624
8625 @param error Handler error code.
8626
8627 @retval true if error is different from HA_ERR_END_OF_FILE or
8628 HA_ERR_KEY_NOT_FOUND.
8629 @retval false if error is HA_ERR_END_OF_FILE or HA_ERR_KEY_NOT_FOUND.
8630 */
is_index_access_error(int error)8631 bool is_index_access_error(int error) {
8632 return (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND);
8633 }
8634