1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 /** @file handler.cc
24
25 @brief
26 Handler-calling-functions
27 */
28
29 #include "handler.h"
30
31 #include "my_bit.h" // my_count_bits
32 #include "myisam.h" // TT_FOR_UPGRADE
33 #include "mysql_version.h" // MYSQL_VERSION_ID
34
35 #include "binlog.h" // mysql_bin_log
36 #include "debug_sync.h" // DEBUG_SYNC
37 #include "discover.h" // writefrm
38 #include "log.h" // sql_print_error
39 #include "log_event.h" // Write_rows_log_event
40 #include "my_bitmap.h" // MY_BITMAP
41 #include "probes_mysql.h" // MYSQL_HANDLER_WRLOCK_START
42 #include "opt_costconstantcache.h" // reload_optimizer_cost_constants
43 #include "rpl_handler.h" // RUN_HOOK
44 #include "sql_base.h" // free_io_cache
45 #include "sql_parse.h" // check_stack_overrun
46 #include "sql_plugin.h" // plugin_foreach
47 #include "sql_table.h" // build_table_filename
48 #include "transaction.h" // trans_commit_implicit
49 #include "trigger_def.h" // TRG_EXT
50 #include "sql_select.h" // actual_key_parts
51 #include "rpl_write_set_handler.h" // add_pke
52 #include "auth_common.h" // check_readonly() and SUPER_ACL
53
54
55 #include "pfs_file_provider.h"
56 #include "mysql/psi/mysql_file.h"
57
58 #include <pfs_table_provider.h>
59 #include <mysql/psi/mysql_table.h>
60
61 #include <pfs_transaction_provider.h>
62 #include <mysql/psi/mysql_transaction.h>
63 #include "opt_hints.h"
64
65 #include <list>
66 #include <cstring>
67 #include <string>
68 #include <boost/foreach.hpp>
69 #include <boost/tokenizer.hpp>
70 #include <boost/algorithm/string.hpp>
71
72 /**
73 @def MYSQL_TABLE_IO_WAIT
74 Instrumentation helper for table io_waits.
75 Note that this helper is intended to be used from
76 within the handler class only, as it uses members
77 from @c handler
78 Performance schema events are instrumented as follows:
79 - in non batch mode, one event is generated per call
80 - in batch mode, the number of rows affected is saved
81 in @c m_psi_numrows, so that @c end_psi_batch_mode()
82 generates a single event for the batch.
83 @param OP the table operation to be performed
84 @param INDEX the table index used if any, or MAX_KEY.
85 @param PAYLOAD instrumented code to execute
86 @sa handler::end_psi_batch_mode.
87 */
88 #ifdef HAVE_PSI_TABLE_INTERFACE
89 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
90 { \
91 if (m_psi != NULL) \
92 { \
93 switch (m_psi_batch_mode) \
94 { \
95 case PSI_BATCH_MODE_NONE: \
96 { \
97 PSI_table_locker *sub_locker= NULL; \
98 PSI_table_locker_state reentrant_safe_state; \
99 sub_locker= PSI_TABLE_CALL(start_table_io_wait) \
100 (& reentrant_safe_state, m_psi, OP, INDEX, \
101 __FILE__, __LINE__); \
102 PAYLOAD \
103 if (sub_locker != NULL) \
104 PSI_TABLE_CALL(end_table_io_wait) \
105 (sub_locker, 1); \
106 break; \
107 } \
108 case PSI_BATCH_MODE_STARTING: \
109 { \
110 m_psi_locker= PSI_TABLE_CALL(start_table_io_wait) \
111 (& m_psi_locker_state, m_psi, OP, INDEX, \
112 __FILE__, __LINE__); \
113 PAYLOAD \
114 if (!RESULT) \
115 m_psi_numrows++; \
116 m_psi_batch_mode= PSI_BATCH_MODE_STARTED; \
117 break; \
118 } \
119 case PSI_BATCH_MODE_STARTED: \
120 default: \
121 { \
122 assert(m_psi_batch_mode \
123 == PSI_BATCH_MODE_STARTED); \
124 PAYLOAD \
125 if (!RESULT) \
126 m_psi_numrows++; \
127 break; \
128 } \
129 } \
130 } \
131 else \
132 { \
133 PAYLOAD \
134 } \
135 }
136 #else
137 #define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \
138 PAYLOAD
139 #endif
140
141 /**
142 @def MYSQL_TABLE_LOCK_WAIT
143 Instrumentation helper for table io_waits.
144 @param OP the table operation to be performed
145 @param FLAGS per table operation flags.
146 @param PAYLOAD the code to instrument.
147 @sa MYSQL_END_TABLE_WAIT.
148 */
149 #ifdef HAVE_PSI_TABLE_INTERFACE
150 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
151 { \
152 if (m_psi != NULL) \
153 { \
154 PSI_table_locker *locker; \
155 PSI_table_locker_state state; \
156 locker= PSI_TABLE_CALL(start_table_lock_wait) \
157 (& state, m_psi, OP, FLAGS, \
158 __FILE__, __LINE__); \
159 PAYLOAD \
160 if (locker != NULL) \
161 PSI_TABLE_CALL(end_table_lock_wait)(locker); \
162 } \
163 else \
164 { \
165 PAYLOAD \
166 } \
167 }
168 #else
169 #define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \
170 PAYLOAD
171 #endif
172
173 using std::min;
174 using std::max;
175 using std::list;
176
177 // This is a temporary backporting fix.
178 #ifndef HAVE_LOG2
179 /*
180 This will be slightly slower and perhaps a tiny bit less accurate than
181 doing it the IEEE754 way but log2() should be available on C99 systems.
182 */
log2(double x)183 inline double log2(double x)
184 {
185 return (log(x) / M_LN2);
186 }
187 #endif
188
189 /*
190 While we have legacy_db_type, we have this array to
191 check for dups and to find handlerton from legacy_db_type.
192 Remove when legacy_db_type is finally gone
193 */
194 st_plugin_int *hton2plugin[MAX_HA];
195
196 /**
197 Array allowing to check if handlerton is builtin without
198 acquiring LOCK_plugin.
199 */
200 static bool builtin_htons[MAX_HA];
201
ha_resolve_storage_engine_name(const handlerton * db_type)202 const char *ha_resolve_storage_engine_name(const handlerton *db_type)
203 {
204 return db_type == NULL ? "UNKNOWN" : hton2plugin[db_type->slot]->name.str;
205 }
206
207 static handlerton *installed_htons[128];
208
209 #define BITMAP_STACKBUF_SIZE (128/8)
210
211 KEY_CREATE_INFO default_key_create_info=
212 { HA_KEY_ALG_UNDEF, 0, {NullS, 0}, {NullS, 0}, true };
213
214 /* number of entries in handlertons[] */
215 ulong total_ha= 0;
216 /* number of storage engines (from handlertons[]) that support 2pc */
217 ulong total_ha_2pc= 0;
218 /* size of savepoint storage area (see ha_init) */
219 ulong savepoint_alloc_size= 0;
220
221 static const LEX_STRING sys_table_aliases[]=
222 {
223 { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") },
224 { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") },
225 { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") },
226 { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") },
227 {NullS, 0}
228 };
229
230 const char *ha_row_type[] = {
231 "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT",
232 /* Reserved to be "PAGE" in future versions */ "?",
233 "?","?","?"
234 };
235
236 const char *tx_isolation_names[] =
237 { "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
238 NullS};
239 TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
240 tx_isolation_names, NULL};
241
242 #ifndef NDEBUG
243
ha_legacy_type_name(legacy_db_type legacy_type)244 const char *ha_legacy_type_name(legacy_db_type legacy_type)
245 {
246 switch (legacy_type)
247 {
248 case DB_TYPE_UNKNOWN:
249 return "DB_TYPE_UNKNOWN";
250 case DB_TYPE_DIAB_ISAM:
251 return "DB_TYPE_DIAB_ISAM";
252 case DB_TYPE_HASH:
253 return "DB_TYPE_HASH";
254 case DB_TYPE_MISAM:
255 return "DB_TYPE_MISAM";
256 case DB_TYPE_PISAM:
257 return "DB_TYPE_PISAM";
258 case DB_TYPE_RMS_ISAM:
259 return "DB_TYPE_RMS_ISAM";
260 case DB_TYPE_HEAP:
261 return "DB_TYPE_HEAP";
262 case DB_TYPE_ISAM:
263 return "DB_TYPE_ISAM";
264 case DB_TYPE_MRG_ISAM:
265 return "DB_TYPE_MRG_ISAM";
266 case DB_TYPE_MYISAM:
267 return "DB_TYPE_MYISAM";
268 case DB_TYPE_MRG_MYISAM:
269 return "DB_TYPE_MRG_MYISAM";
270 case DB_TYPE_BERKELEY_DB:
271 return "DB_TYPE_BERKELEY_DB";
272 case DB_TYPE_INNODB:
273 return "DB_TYPE_INNODB";
274 case DB_TYPE_GEMINI:
275 return "DB_TYPE_GEMINI";
276 case DB_TYPE_NDBCLUSTER:
277 return "DB_TYPE_NDBCLUSTER";
278 case DB_TYPE_EXAMPLE_DB:
279 return "DB_TYPE_EXAMPLE_DB";
280 case DB_TYPE_ARCHIVE_DB:
281 return "DB_TYPE_ARCHIVE_DB";
282 case DB_TYPE_CSV_DB:
283 return "DB_TYPE_CSV_DB";
284 case DB_TYPE_FEDERATED_DB:
285 return "DB_TYPE_FEDERATED_DB";
286 case DB_TYPE_BLACKHOLE_DB:
287 return "DB_TYPE_BLACKHOLE_DB";
288 case DB_TYPE_PARTITION_DB:
289 return "DB_TYPE_PARTITION_DB";
290 case DB_TYPE_BINLOG:
291 return "DB_TYPE_BINLOG";
292 case DB_TYPE_SOLID:
293 return "DB_TYPE_SOLID";
294 case DB_TYPE_PBXT:
295 return "DB_TYPE_PBXT";
296 case DB_TYPE_TABLE_FUNCTION:
297 return "DB_TYPE_TABLE_FUNCTION";
298 case DB_TYPE_MEMCACHE:
299 return "DB_TYPE_MEMCACHE";
300 case DB_TYPE_FALCON:
301 return "DB_TYPE_FALCON";
302 case DB_TYPE_MARIA:
303 return "DB_TYPE_MARIA";
304 case DB_TYPE_PERFORMANCE_SCHEMA:
305 return "DB_TYPE_PERFORMANCE_SCHEMA";
306 default:
307 return "DB_TYPE_DYNAMIC";
308 }
309 }
310 #endif
311
312 /**
313 Database name that hold most of mysqld system tables.
314 Current code assumes that, there exists only some
315 specific "database name" designated as system database.
316 */
317 const char* mysqld_system_database= "mysql";
318
319 // System tables that belong to mysqld_system_database.
320 st_handler_tablename mysqld_system_tables[]= {
321 {mysqld_system_database, "db"},
322 {mysqld_system_database, "user"},
323 {mysqld_system_database, "host"},
324 {mysqld_system_database, "func"},
325 {mysqld_system_database, "proc"},
326 {mysqld_system_database, "event"},
327 {mysqld_system_database, "plugin"},
328 {mysqld_system_database, "servers"},
329 {mysqld_system_database, "procs_priv"},
330 {mysqld_system_database, "tables_priv"},
331 {mysqld_system_database, "proxies_priv"},
332 {mysqld_system_database, "columns_priv"},
333 {mysqld_system_database, "time_zone"},
334 {mysqld_system_database, "time_zone_name"},
335 {mysqld_system_database, "time_zone_leap_second"},
336 {mysqld_system_database, "time_zone_transition"},
337 {mysqld_system_database, "time_zone_transition_type"},
338 {mysqld_system_database, "help_category"},
339 {mysqld_system_database, "help_keyword"},
340 {mysqld_system_database, "help_relation"},
341 {mysqld_system_database, "help_topic"},
342 {mysqld_system_database, "innodb_table_stats"},
343 {mysqld_system_database, "innodb_index_stats"},
344 {(const char *)NULL, (const char *)NULL} /* This must be at the end */
345 };
346
347 /**
348 This static pointer holds list of system databases from SQL layer and
349 various SE's. The required memory is allocated once, and never freed.
350 */
351 static const char **known_system_databases= NULL;
352 static const char **ha_known_system_databases();
353
354 // Called for each SE to get SE specific system database.
355 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
356 void *arg);
357
358 // Called for each SE to check if given db.table_name is a system table.
359 static my_bool check_engine_system_table_handlerton(THD *unused,
360 plugin_ref plugin,
361 void *arg);
362 /**
363 Structure used by SE during check for system table.
364 This structure is passed to each SE handlerton and the status (OUT param)
365 is collected.
366 */
367 struct st_sys_tbl_chk_params
368 {
369 const char *db; // IN param
370 const char *table_name; // IN param
371 bool is_sql_layer_system_table; // IN param
372 legacy_db_type db_type; // IN param
373
374 enum enum_status
375 {
376 // db.table_name is user table.
377 USER_TABLE,
378 /*
379 db.table_name is a system table,
380 but may not be supported by SE.
381 */
382 SYSTEM_TABLE,
383 /*
384 db.table_name is a system table,
385 and is supported by SE.
386 */
387 SE_SUPPORTED_SYSTEM_TABLE
388 } status; // OUT param
389 };
390
391
ha_default_plugin(THD * thd)392 static plugin_ref ha_default_plugin(THD *thd)
393 {
394 if (thd->variables.table_plugin)
395 return thd->variables.table_plugin;
396 return my_plugin_lock(thd, &global_system_variables.table_plugin);
397 }
398
399
400 /** @brief
401 Return the default storage engine handlerton used for non-temp tables
402 for thread
403
404 SYNOPSIS
405 ha_default_handlerton(thd)
406 thd current thread
407
408 RETURN
409 pointer to handlerton
410 */
ha_default_handlerton(THD * thd)411 handlerton *ha_default_handlerton(THD *thd)
412 {
413 plugin_ref plugin= ha_default_plugin(thd);
414 assert(plugin);
415 handlerton *hton= plugin_data<handlerton*>(plugin);
416 assert(hton);
417 return hton;
418 }
419
420
ha_default_temp_plugin(THD * thd)421 static plugin_ref ha_default_temp_plugin(THD *thd)
422 {
423 if (thd->variables.temp_table_plugin)
424 return thd->variables.temp_table_plugin;
425 return my_plugin_lock(thd, &global_system_variables.temp_table_plugin);
426 }
427
428
429 /** @brief
430 Return the default storage engine handlerton used for explicitly
431 created temp tables for a thread
432
433 SYNOPSIS
434 ha_default_temp_handlerton(thd)
435 thd current thread
436
437 RETURN
438 pointer to handlerton
439 */
ha_default_temp_handlerton(THD * thd)440 handlerton *ha_default_temp_handlerton(THD *thd)
441 {
442 plugin_ref plugin= ha_default_temp_plugin(thd);
443 assert(plugin);
444 handlerton *hton= plugin_data<handlerton*>(plugin);
445 assert(hton);
446 return hton;
447 }
448
449
450 /**
451 Resolve handlerton plugin by name, without checking for "DEFAULT" or
452 HTON_NOT_USER_SELECTABLE.
453
454 @param thd Thread context.
455 @param name Plugin name.
456
457 @return plugin or NULL if not found.
458 */
ha_resolve_by_name_raw(THD * thd,const LEX_CSTRING & name)459 plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name)
460 {
461 return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN);
462 }
463
464 /** @brief
465 Return the storage engine handlerton for the supplied name
466
467 SYNOPSIS
468 ha_resolve_by_name(thd, name)
469 thd current thread
470 name name of storage engine
471
472 RETURN
473 pointer to storage engine plugin handle
474 */
ha_resolve_by_name(THD * thd,const LEX_STRING * name,bool is_temp_table)475 plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name,
476 bool is_temp_table)
477 {
478 const LEX_STRING *table_alias;
479 plugin_ref plugin;
480
481 redo:
482 /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
483 if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
484 (const uchar *)name->str, name->length,
485 (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
486 return is_temp_table ?
487 ha_default_plugin(thd) : ha_default_temp_plugin(thd);
488
489 LEX_CSTRING cstring_name= {name->str, name->length};
490 if ((plugin= ha_resolve_by_name_raw(thd, cstring_name)))
491 {
492 handlerton *hton= plugin_data<handlerton*>(plugin);
493 if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE))
494 return plugin;
495
496 /*
497 unlocking plugin immediately after locking is relatively low cost.
498 */
499 plugin_unlock(thd, plugin);
500 }
501
502 /*
503 We check for the historical aliases.
504 */
505 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
506 {
507 if (!my_strnncoll(&my_charset_latin1,
508 (const uchar *)name->str, name->length,
509 (const uchar *)table_alias->str, table_alias->length))
510 {
511 name= table_alias + 1;
512 goto redo;
513 }
514 }
515
516 return NULL;
517 }
518
519 std::string normalized_se_str= "";
520
521 /*
522 Parse comma separated list of disabled storage engine names
523 and create a normalized string by appending storage names that
524 have aliases. This normalized string is used to disallow
525 table/tablespace creation under the storage engines specified.
526 */
ha_set_normalized_disabled_se_str(const std::string & disabled_se)527 void ha_set_normalized_disabled_se_str(const std::string &disabled_se)
528 {
529 boost::char_separator<char> sep(",");
530 boost::tokenizer< boost::char_separator<char> > tokens(disabled_se, sep);
531 normalized_se_str.append(",");
532 BOOST_FOREACH (std::string se_name, tokens)
533 {
534 const LEX_STRING *table_alias;
535 boost::algorithm::to_upper(se_name);
536 for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
537 {
538 if (!strcasecmp(se_name.c_str(), table_alias->str) ||
539 !strcasecmp(se_name.c_str(), (table_alias+1)->str))
540 {
541 normalized_se_str.append(std::string(table_alias->str) + "," +
542 std::string((table_alias+1)->str) + ",");
543 break;
544 }
545 }
546
547 if (table_alias->str == NULL)
548 normalized_se_str.append(se_name+",");
549 }
550 }
551
552 // Check if storage engine is disabled for table/tablespace creation.
ha_is_storage_engine_disabled(handlerton * se_handle)553 bool ha_is_storage_engine_disabled(handlerton *se_handle)
554 {
555 if (normalized_se_str.size())
556 {
557 std::string se_name(",");
558 se_name.append(ha_resolve_storage_engine_name(se_handle));
559 se_name.append(",");
560 boost::algorithm::to_upper(se_name);
561 if(strstr(normalized_se_str.c_str(), se_name.c_str()))
562 return true;
563 }
564 return false;
565 }
566
567
ha_lock_engine(THD * thd,const handlerton * hton)568 plugin_ref ha_lock_engine(THD *thd, const handlerton *hton)
569 {
570 if (hton)
571 {
572 st_plugin_int **plugin= hton2plugin + hton->slot;
573
574 #ifdef NDEBUG
575 /*
576 Take a shortcut for builtin engines -- return pointer to plugin
577 without acquiring LOCK_plugin mutex. This is safe safe since such
578 plugins are not deleted until shutdown and we don't do reference
579 counting in non-debug builds for them.
580
581 Since we have reference to handlerton on our hands, this method
582 can't be called concurrently to non-builtin handlerton initialization/
583 deinitialization. So it is safe to access builtin_htons[] without
584 additional locking.
585 */
586 if (builtin_htons[hton->slot])
587 return *plugin;
588
589 return my_plugin_lock(thd, plugin);
590 #else
591 /*
592 We can't take shortcut in debug builds.
593 At least assert that builtin_htons[slot] is set correctly.
594 */
595 assert(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == NULL));
596 return my_plugin_lock(thd, &plugin);
597 #endif
598 }
599 return NULL;
600 }
601
602
ha_resolve_by_legacy_type(THD * thd,enum legacy_db_type db_type)603 handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
604 {
605 plugin_ref plugin;
606 switch (db_type) {
607 case DB_TYPE_DEFAULT:
608 return ha_default_handlerton(thd);
609 default:
610 if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
611 (plugin= ha_lock_engine(thd, installed_htons[db_type])))
612 return plugin_data<handlerton*>(plugin);
613 /* fall through */
614 case DB_TYPE_UNKNOWN:
615 return NULL;
616 }
617 }
618
619
620 /**
621 Use other database handler if databasehandler is not compiled in.
622 */
ha_checktype(THD * thd,enum legacy_db_type database_type,bool no_substitute,bool report_error)623 handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
624 bool no_substitute, bool report_error)
625 {
626 handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
627 if (ha_storage_engine_is_enabled(hton))
628 return hton;
629
630 if (no_substitute)
631 {
632 if (report_error)
633 {
634 const char *engine_name= ha_resolve_storage_engine_name(hton);
635 my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
636 }
637 return NULL;
638 }
639
640 (void) RUN_HOOK(transaction, after_rollback, (thd, FALSE));
641
642 switch (database_type) {
643 case DB_TYPE_MRG_ISAM:
644 return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM);
645 default:
646 break;
647 }
648
649 return ha_default_handlerton(thd);
650 } /* ha_checktype */
651
652
get_new_handler(TABLE_SHARE * share,MEM_ROOT * alloc,handlerton * db_type)653 handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
654 handlerton *db_type)
655 {
656 handler *file;
657 DBUG_ENTER("get_new_handler");
658 DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
659
660 if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
661 {
662 if ((file= db_type->create(db_type, share, alloc)))
663 file->init();
664 DBUG_RETURN(file);
665 }
666 /*
667 Try the default table type
668 Here the call to current_thd() is ok as we call this function a lot of
669 times but we enter this branch very seldom.
670 */
671 DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
672 }
673
674
675 static const char **handler_errmsgs;
676
677 C_MODE_START
get_handler_errmsg(int nr)678 static const char *get_handler_errmsg(int nr)
679 {
680 return handler_errmsgs[nr - HA_ERR_FIRST];
681 }
682 C_MODE_END
683
684
685 /**
686 Register handler error messages for use with my_error().
687
688 @retval
689 0 OK
690 @retval
691 !=0 Error
692 */
693
ha_init_errors(void)694 int ha_init_errors(void)
695 {
696 #define SETMSG(nr, msg) handler_errmsgs[(nr) - HA_ERR_FIRST]= (msg)
697
698 /* Allocate a pointer array for the error message strings. */
699 /* Zerofill it to avoid uninitialized gaps. */
700 if (! (handler_errmsgs= (const char**) my_malloc(key_memory_handler_errmsgs,
701 HA_ERR_ERRORS * sizeof(char*),
702 MYF(MY_WME | MY_ZEROFILL))))
703 return 1;
704
705 /* Set the dedicated error messages. */
706 SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND));
707 SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY));
708 SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
709 SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
710 SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE));
711 SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE));
712 SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
713 SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
714 SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
715 SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE));
716 SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
717 SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
718 SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL));
719 SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
720 SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
721 SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA));
722 SETMSG(HA_ERR_TOO_BIG_ROW, "Too big row");
723 SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
724 SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE));
725 SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
726 SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE));
727 SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR));
728 SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE));
729 SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT));
730 SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL));
731 SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION));
732 SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK));
733 SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN));
734 SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2));
735 SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2));
736 SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
737 SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
738 SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
739 SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR));
740 SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
741 SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED));
742 SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
743 SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE));
744 SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY));
745 SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED));
746 SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
747 SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
748 SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
749 SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
750 SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID");
751 SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK));
752 SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists");
753 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
754 SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");
755 SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
756 SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
757 SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
758 SETMSG(HA_ERR_TABLE_CORRUPT, ER_DEFAULT(ER_TABLE_CORRUPT));
759 SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING));
760 SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY, ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY));
761 SETMSG(HA_ERR_WRONG_FILE_NAME, ER_DEFAULT(ER_WRONG_FILE_NAME));
762 SETMSG(HA_ERR_NOT_ALLOWED_COMMAND, ER_DEFAULT(ER_NOT_ALLOWED_COMMAND));
763 SETMSG(HA_ERR_COMPUTE_FAILED, "Compute virtual column value failed");
764 SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP, "Too many nested sub-expressions in a full-text search");
765 /* Register the error messages for use with my_error(). */
766 return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST);
767 }
768
769
ha_finalize_handlerton(st_plugin_int * plugin)770 int ha_finalize_handlerton(st_plugin_int *plugin)
771 {
772 handlerton *hton= (handlerton *)plugin->data;
773 DBUG_ENTER("ha_finalize_handlerton");
774
775 /* hton can be NULL here, if ha_initialize_handlerton() failed. */
776 if (!hton)
777 goto end;
778
779 switch (hton->state)
780 {
781 case SHOW_OPTION_NO:
782 case SHOW_OPTION_DISABLED:
783 break;
784 case SHOW_OPTION_YES:
785 if (installed_htons[hton->db_type] == hton)
786 installed_htons[hton->db_type]= NULL;
787 break;
788 };
789
790 if (hton->panic)
791 hton->panic(hton, HA_PANIC_CLOSE);
792
793 if (plugin->plugin->deinit)
794 {
795 /*
796 Today we have no defined/special behavior for uninstalling
797 engine plugins.
798 */
799 DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
800 if (plugin->plugin->deinit(NULL))
801 {
802 DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
803 plugin->name.str));
804 }
805 }
806
807 /*
808 In case a plugin is uninstalled and re-installed later, it should
809 reuse an array slot. Otherwise the number of uninstall/install
810 cycles would be limited.
811 */
812 if (hton->slot != HA_SLOT_UNDEF)
813 {
814 /* Make sure we are not unpluging another plugin */
815 assert(hton2plugin[hton->slot] == plugin);
816 assert(hton->slot < MAX_HA);
817 hton2plugin[hton->slot]= NULL;
818 builtin_htons[hton->slot]= false; /* Extra correctness. */
819 }
820
821 my_free(hton);
822
823 end:
824 DBUG_RETURN(0);
825 }
826
827
ha_initialize_handlerton(st_plugin_int * plugin)828 int ha_initialize_handlerton(st_plugin_int *plugin)
829 {
830 handlerton *hton;
831 DBUG_ENTER("ha_initialize_handlerton");
832 DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
833
834 hton= (handlerton *)my_malloc(key_memory_handlerton,
835 sizeof(handlerton),
836 MYF(MY_WME | MY_ZEROFILL));
837
838 if (hton == NULL)
839 {
840 sql_print_error("Unable to allocate memory for plugin '%s' handlerton.",
841 plugin->name.str);
842 goto err_no_hton_memory;
843 }
844
845 hton->slot= HA_SLOT_UNDEF;
846 /* Historical Requirement */
847 plugin->data= hton; // shortcut for the future
848 if (plugin->plugin->init && plugin->plugin->init(hton))
849 {
850 sql_print_error("Plugin '%s' init function returned error.",
851 plugin->name.str);
852 goto err;
853 }
854
855 /*
856 the switch below and hton->state should be removed when
857 command-line options for plugins will be implemented
858 */
859 DBUG_PRINT("info", ("hton->state=%d", hton->state));
860 switch (hton->state) {
861 case SHOW_OPTION_NO:
862 break;
863 case SHOW_OPTION_YES:
864 {
865 uint tmp;
866 ulong fslot;
867 /* now check the db_type for conflict */
868 if (hton->db_type <= DB_TYPE_UNKNOWN ||
869 hton->db_type >= DB_TYPE_DEFAULT ||
870 installed_htons[hton->db_type])
871 {
872 int idx= (int) DB_TYPE_FIRST_DYNAMIC;
873
874 while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
875 idx++;
876
877 if (idx == (int) DB_TYPE_DEFAULT)
878 {
879 sql_print_warning("Too many storage engines!");
880 goto err_deinit;
881 }
882 if (hton->db_type != DB_TYPE_UNKNOWN)
883 sql_print_warning("Storage engine '%s' has conflicting typecode. "
884 "Assigning value %d.", plugin->plugin->name, idx);
885 hton->db_type= (enum legacy_db_type) idx;
886 }
887
888 /*
889 In case a plugin is uninstalled and re-installed later, it should
890 reuse an array slot. Otherwise the number of uninstall/install
891 cycles would be limited. So look for a free slot.
892 */
893 DBUG_PRINT("plugin", ("total_ha: %lu", total_ha));
894 for (fslot= 0; fslot < total_ha; fslot++)
895 {
896 if (!hton2plugin[fslot])
897 break;
898 }
899 if (fslot < total_ha)
900 hton->slot= fslot;
901 else
902 {
903 if (total_ha >= MAX_HA)
904 {
905 sql_print_error("Too many plugins loaded. Limit is %lu. "
906 "Failed on '%s'", (ulong) MAX_HA, plugin->name.str);
907 goto err_deinit;
908 }
909 hton->slot= total_ha++;
910 }
911 installed_htons[hton->db_type]= hton;
912 tmp= hton->savepoint_offset;
913 hton->savepoint_offset= savepoint_alloc_size;
914 savepoint_alloc_size+= tmp;
915 hton2plugin[hton->slot]=plugin;
916 builtin_htons[hton->slot]= (plugin->plugin_dl == NULL);
917 if (hton->prepare)
918 total_ha_2pc++;
919 break;
920 }
921 /* fall through */
922 default:
923 hton->state= SHOW_OPTION_DISABLED;
924 break;
925 }
926
927 /*
928 This is entirely for legacy. We will create a new "disk based" hton and a
929 "memory" hton which will be configurable longterm. We should be able to
930 remove partition and myisammrg.
931 */
932 switch (hton->db_type) {
933 case DB_TYPE_HEAP:
934 heap_hton= hton;
935 break;
936 case DB_TYPE_MYISAM:
937 myisam_hton= hton;
938 break;
939 case DB_TYPE_INNODB:
940 innodb_hton= hton;
941 break;
942 default:
943 break;
944 };
945
946 /*
947 Re-load the optimizer cost constants since this storage engine can
948 have non-default cost constants.
949 */
950 reload_optimizer_cost_constants();
951
952 DBUG_RETURN(0);
953
954 err_deinit:
955 /*
956 Let plugin do its inner deinitialization as plugin->init()
957 was successfully called before.
958 */
959 if (plugin->plugin->deinit)
960 (void) plugin->plugin->deinit(NULL);
961
962 err:
963 my_free(hton);
964 err_no_hton_memory:
965 plugin->data= NULL;
966 DBUG_RETURN(1);
967 }
968
ha_init()969 int ha_init()
970 {
971 int error= 0;
972 DBUG_ENTER("ha_init");
973
974 assert(total_ha < MAX_HA);
975 /*
976 Check if there is a transaction-capable storage engine besides the
977 binary log (which is considered a transaction-capable storage engine in
978 counting total_ha)
979 */
980 opt_using_transactions= total_ha>(ulong)opt_bin_log;
981 savepoint_alloc_size+= sizeof(SAVEPOINT);
982
983 /*
984 Initialize system database name cache.
985 This cache is used to do a quick check if a given
986 db.tablename is a system table.
987 */
988 known_system_databases= ha_known_system_databases();
989
990 DBUG_RETURN(error);
991 }
992
ha_end()993 void ha_end()
994 {
995 // Unregister handler error messages.
996 my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST);
997 my_free(handler_errmsgs);
998 }
999
dropdb_handlerton(THD * unused1,plugin_ref plugin,void * path)1000 static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
1001 void *path)
1002 {
1003 handlerton *hton= plugin_data<handlerton*>(plugin);
1004 if (hton->state == SHOW_OPTION_YES && hton->drop_database)
1005 hton->drop_database(hton, (char *)path);
1006 return FALSE;
1007 }
1008
1009
ha_drop_database(char * path)1010 void ha_drop_database(char* path)
1011 {
1012 plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
1013 }
1014
1015
closecon_handlerton(THD * thd,plugin_ref plugin,void * unused)1016 static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
1017 void *unused)
1018 {
1019 handlerton *hton= plugin_data<handlerton*>(plugin);
1020 /*
1021 there's no need to rollback here as all transactions must
1022 be rolled back already
1023 */
1024 if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton))
1025 {
1026 if (hton->close_connection)
1027 hton->close_connection(hton, thd);
1028 /* make sure ha_data is reset and ha_data_lock is released */
1029 thd_set_ha_data(thd, hton, NULL);
1030 }
1031 return FALSE;
1032 }
1033
1034
1035 /**
1036 @note
1037 don't bother to rollback here, it's done already
1038 */
ha_close_connection(THD * thd)1039 void ha_close_connection(THD* thd)
1040 {
1041 plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1042 }
1043
1044
kill_handlerton(THD * thd,plugin_ref plugin,void *)1045 static my_bool kill_handlerton(THD *thd, plugin_ref plugin, void *)
1046 {
1047 handlerton *hton= plugin_data<handlerton*>(plugin);
1048
1049 if (hton->state == SHOW_OPTION_YES && hton->kill_connection)
1050 {
1051 if (thd_get_ha_data(thd, hton))
1052 hton->kill_connection(hton, thd);
1053 }
1054
1055 return FALSE;
1056 }
1057
ha_kill_connection(THD * thd)1058 void ha_kill_connection(THD *thd)
1059 {
1060 plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
1061 }
1062
1063
1064 /* ========================================================================
1065 ======================= TRANSACTIONS ===================================*/
1066
1067 /**
1068 Transaction handling in the server
1069 ==================================
1070
1071 In each client connection, MySQL maintains two transactional
1072 states:
1073 - a statement transaction,
1074 - a standard, also called normal transaction.
1075
1076 Historical note
1077 ---------------
1078 "Statement transaction" is a non-standard term that comes
1079 from the times when MySQL supported BerkeleyDB storage engine.
1080
1081 First of all, it should be said that in BerkeleyDB auto-commit
1082 mode auto-commits operations that are atomic to the storage
1083 engine itself, such as a write of a record, and are too
1084 high-granular to be atomic from the application perspective
1085 (MySQL). One SQL statement could involve many BerkeleyDB
1086 auto-committed operations and thus BerkeleyDB auto-commit was of
1087 little use to MySQL.
1088
1089 Secondly, instead of SQL standard savepoints, BerkeleyDB
1090 provided the concept of "nested transactions". In a nutshell,
1091 transactions could be arbitrarily nested, but when the parent
1092 transaction was committed or aborted, all its child (nested)
1093 transactions were handled committed or aborted as well.
1094 Commit of a nested transaction, in turn, made its changes
1095 visible, but not durable: it destroyed the nested transaction,
1096 all its changes would become available to the parent and
1097 currently active nested transactions of this parent.
1098
1099 So the mechanism of nested transactions was employed to
1100 provide "all or nothing" guarantee of SQL statements
1101 required by the standard.
1102 A nested transaction would be created at start of each SQL
1103 statement, and destroyed (committed or aborted) at statement
1104 end. Such nested transaction was internally referred to as
1105 a "statement transaction" and gave birth to the term.
1106
1107 (Historical note ends)
1108
1109 Since then a statement transaction is started for each statement
1110 that accesses transactional tables or uses the binary log. If
1111 the statement succeeds, the statement transaction is committed.
1112 If the statement fails, the transaction is rolled back. Commits
1113 of statement transactions are not durable -- each such
1114 transaction is nested in the normal transaction, and if the
1115 normal transaction is rolled back, the effects of all enclosed
1116 statement transactions are undone as well. Technically,
1117 a statement transaction can be viewed as a savepoint which is
1118 maintained automatically in order to make effects of one
1119 statement atomic.
1120
1121 The normal transaction is started by the user and is ended
1122 usually upon a user request as well. The normal transaction
1123 encloses transactions of all statements issued between
1124 its beginning and its end.
1125 In autocommit mode, the normal transaction is equivalent
1126 to the statement transaction.
1127
1128 Since MySQL supports PSEA (pluggable storage engine
1129 architecture), more than one transactional engine can be
1130 active at a time. Hence transactions, from the server
1131 point of view, are always distributed. In particular,
1132 transactional state is maintained independently for each
1133 engine. In order to commit a transaction the two phase
1134 commit protocol is employed.
1135
1136 Not all statements are executed in context of a transaction.
1137 Administrative and status information statements do not modify
1138 engine data, and thus do not start a statement transaction and
1139 also have no effect on the normal transaction. Examples of such
1140 statements are SHOW STATUS and RESET SLAVE.
1141
1142 Similarly DDL statements are not transactional,
1143 and therefore a transaction is [almost] never started for a DDL
1144 statement. The difference between a DDL statement and a purely
1145 administrative statement though is that a DDL statement always
1146 commits the current transaction before proceeding, if there is
1147 any.
1148
1149 At last, SQL statements that work with non-transactional
1150 engines also have no effect on the transaction state of the
1151 connection. Even though they are written to the binary log,
1152 and the binary log is, overall, transactional, the writes
1153 are done in "write-through" mode, directly to the binlog
1154 file, followed with a OS cache sync, in other words,
1155 bypassing the binlog undo log (translog).
1156 They do not commit the current normal transaction.
1157 A failure of a statement that uses non-transactional tables
1158 would cause a rollback of the statement transaction, but
1159 in case there no non-transactional tables are used,
1160 no statement transaction is started.
1161
1162 Data layout
1163 -----------
1164
1165 The server stores its transaction-related data in
1166 thd->transaction. This structure has two members of type
1167 THD_TRANS. These members correspond to the statement and
1168 normal transactions respectively:
1169
1170 - thd->transaction.stmt contains a list of engines
1171 that are participating in the given statement
1172 - thd->transaction.all contains a list of engines that
1173 have participated in any of the statement transactions started
1174 within the context of the normal transaction.
1175 Each element of the list contains a pointer to the storage
1176 engine, engine-specific transactional data, and engine-specific
1177 transaction flags.
1178
1179 In autocommit mode thd->transaction.all is empty.
1180 Instead, data of thd->transaction.stmt is
1181 used to commit/rollback the normal transaction.
1182
1183 The list of registered engines has a few important properties:
1184 - no engine is registered in the list twice
1185 - engines are present in the list a reverse temporal order --
1186 new participants are always added to the beginning of the list.
1187
1188 Transaction life cycle
1189 ----------------------
1190
1191 When a new connection is established, thd->transaction
1192 members are initialized to an empty state.
1193 If a statement uses any tables, all affected engines
1194 are registered in the statement engine list. In
1195 non-autocommit mode, the same engines are registered in
1196 the normal transaction list.
1197 At the end of the statement, the server issues a commit
1198 or a roll back for all engines in the statement list.
1199 At this point transaction flags of an engine, if any, are
1200 propagated from the statement list to the list of the normal
1201 transaction.
1202 When commit/rollback is finished, the statement list is
1203 cleared. It will be filled in again by the next statement,
1204 and emptied again at the next statement's end.
1205
1206 The normal transaction is committed in a similar way
1207 (by going over all engines in thd->transaction.all list)
1208 but at different times:
1209 - upon COMMIT SQL statement is issued by the user
1210 - implicitly, by the server, at the beginning of a DDL statement
1211 or SET AUTOCOMMIT={0|1} statement.
1212
1213 The normal transaction can be rolled back as well:
1214 - if the user has requested so, by issuing ROLLBACK SQL
1215 statement
1216 - if one of the storage engines requested a rollback
1217 by setting thd->transaction_rollback_request. This may
1218 happen in case, e.g., when the transaction in the engine was
1219 chosen a victim of the internal deadlock resolution algorithm
1220 and rolled back internally. When such a situation happens, there
1221 is little the server can do and the only option is to rollback
1222 transactions in all other participating engines. In this case
1223 the rollback is accompanied by an error sent to the user.
1224
1225 As follows from the use cases above, the normal transaction
1226 is never committed when there is an outstanding statement
1227 transaction. In most cases there is no conflict, since
1228 commits of the normal transaction are issued by a stand-alone
1229 administrative or DDL statement, thus no outstanding statement
1230 transaction of the previous statement exists. Besides,
1231 all statements that manipulate with the normal transaction
1232 are prohibited in stored functions and triggers, therefore
1233 no conflicting situation can occur in a sub-statement either.
1234 The remaining rare cases when the server explicitly has
1235 to commit the statement transaction prior to committing the normal
1236 one cover error-handling scenarios (see for example
1237 SQLCOM_LOCK_TABLES).
1238
1239 When committing a statement or a normal transaction, the server
1240 either uses the two-phase commit protocol, or issues a commit
1241 in each engine independently. The two-phase commit protocol
1242 is used only if:
1243 - all participating engines support two-phase commit (provide
1244 handlerton::prepare PSEA API call) and
1245 - transactions in at least two engines modify data (i.e. are
1246 not read-only).
1247
1248 Note that the two phase commit is used for
1249 statement transactions, even though they are not durable anyway.
1250 This is done to ensure logical consistency of data in a multiple-
1251 engine transaction.
1252 For example, imagine that some day MySQL supports unique
1253 constraint checks deferred till the end of statement. In such
1254 case a commit in one of the engines may yield ER_DUP_KEY,
1255 and MySQL should be able to gracefully abort statement
1256 transactions of other participants.
1257
1258 After the normal transaction has been committed,
1259 thd->transaction.all list is cleared.
1260
1261 When a connection is closed, the current normal transaction, if
1262 any, is rolled back.
1263
1264 Roles and responsibilities
1265 --------------------------
1266
1267 The server has no way to know that an engine participates in
1268 the statement and a transaction has been started
1269 in it unless the engine says so. Thus, in order to be
1270 a part of a transaction, the engine must "register" itself.
1271 This is done by invoking trans_register_ha() server call.
1272 Normally the engine registers itself whenever handler::external_lock()
1273 is called. trans_register_ha() can be invoked many times: if
1274 an engine is already registered, the call does nothing.
1275 In case autocommit is not set, the engine must register itself
1276 twice -- both in the statement list and in the normal transaction
1277 list.
1278 In which list to register is a parameter of trans_register_ha().
1279
1280 Note, that although the registration interface in itself is
1281 fairly clear, the current usage practice often leads to undesired
1282 effects. E.g. since a call to trans_register_ha() in most engines
1283 is embedded into implementation of handler::external_lock(), some
1284 DDL statements start a transaction (at least from the server
1285 point of view) even though they are not expected to. E.g.
1286 CREATE TABLE does not start a transaction, since
1287 handler::external_lock() is never called during CREATE TABLE. But
1288 CREATE TABLE ... SELECT does, since handler::external_lock() is
1289 called for the table that is being selected from. This has no
1290 practical effects currently, but must be kept in mind
1291 nevertheless.
1292
1293 Once an engine is registered, the server will do the rest
1294 of the work.
1295
1296 During statement execution, whenever any of data-modifying
1297 PSEA API methods is used, e.g. handler::write_row() or
1298 handler::update_row(), the read-write flag is raised in the
1299 statement transaction for the involved engine.
1300 Currently All PSEA calls are "traced", and the data can not be
1301 changed in a way other than issuing a PSEA call. Important:
1302 unless this invariant is preserved the server will not know that
1303 a transaction in a given engine is read-write and will not
1304 involve the two-phase commit protocol!
1305
1306 At the end of a statement, server call trans_commit_stmt is
1307 invoked. This call in turn invokes handlerton::prepare()
1308 for every involved engine. Prepare is followed by a call
1309 to handlerton::commit_one_phase() If a one-phase commit
1310 will suffice, handlerton::prepare() is not invoked and
1311 the server only calls handlerton::commit_one_phase().
1312 At statement commit, the statement-related read-write
1313 engine flag is propagated to the corresponding flag in the
1314 normal transaction. When the commit is complete, the list
1315 of registered engines is cleared.
1316
1317 Rollback is handled in a similar fashion.
1318
1319 Additional notes on DDL and the normal transaction.
1320 ---------------------------------------------------
1321
1322 DDLs and operations with non-transactional engines
1323 do not "register" in thd->transaction lists, and thus do not
1324 modify the transaction state. Besides, each DDL in
1325 MySQL is prefixed with an implicit normal transaction commit
1326 (a call to trans_commit_implicit()), and thus leaves nothing
1327 to modify.
1328 However, as it has been pointed out with CREATE TABLE .. SELECT,
1329 some DDL statements can start a *new* transaction.
1330
1331 Behaviour of the server in this case is currently badly
1332 defined.
1333 DDL statements use a form of "semantic" logging
1334 to maintain atomicity: if CREATE TABLE .. SELECT failed,
1335 the newly created table is deleted.
1336 In addition, some DDL statements issue interim transaction
1337 commits: e.g. ALTER TABLE issues a commit after data is copied
1338 from the original table to the internal temporary table. Other
1339 statements, e.g. CREATE TABLE ... SELECT do not always commit
1340 after itself.
1341 And finally there is a group of DDL statements such as
1342 RENAME/DROP TABLE that doesn't start a new transaction
1343 and doesn't commit.
1344
1345 This diversity makes it hard to say what will happen if
1346 by chance a stored function is invoked during a DDL --
1347 whether any modifications it makes will be committed or not
1348 is not clear. Fortunately, SQL grammar of few DDLs allows
1349 invocation of a stored function.
1350
1351 A consistent behaviour is perhaps to always commit the normal
1352 transaction after all DDLs, just like the statement transaction
1353 is always committed at the end of all statements.
1354 */
1355
1356 /**
1357 Register a storage engine for a transaction.
1358
1359 Every storage engine MUST call this function when it starts
1360 a transaction or a statement (that is it must be called both for the
1361 "beginning of transaction" and "beginning of statement").
1362 Only storage engines registered for the transaction/statement
1363 will know when to commit/rollback it.
1364
1365 @note
1366 trans_register_ha is idempotent - storage engine may register many
1367 times per transaction.
1368
1369 */
trans_register_ha(THD * thd,bool all,handlerton * ht_arg,const ulonglong * trxid)1370 void trans_register_ha(THD *thd, bool all, handlerton *ht_arg,
1371 const ulonglong *trxid)
1372 {
1373 Ha_trx_info *ha_info;
1374 Transaction_ctx *trn_ctx= thd->get_transaction();
1375 Transaction_ctx::enum_trx_scope trx_scope=
1376 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1377
1378 DBUG_ENTER("trans_register_ha");
1379 DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
1380
1381 Ha_trx_info *knownn_trans= trn_ctx->ha_trx_info(trx_scope);
1382 if (all)
1383 {
1384 /*
1385 Ensure no active backup engine data exists, unless the current transaction
1386 is from replication and in active xa state.
1387 */
1388 assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1389 (thd->get_transaction()->xid_state()->
1390 has_state(XID_STATE::XA_ACTIVE)));
1391 assert(thd->ha_data[ht_arg->slot].ha_ptr_backup == NULL ||
1392 (thd->is_binlog_applier() || thd->slave_thread));
1393
1394 thd->server_status|= SERVER_STATUS_IN_TRANS;
1395 if (thd->tx_read_only)
1396 thd->server_status|= SERVER_STATUS_IN_TRANS_READONLY;
1397 DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS"));
1398 }
1399
1400 ha_info= thd->ha_data[ht_arg->slot].ha_info + (all ? 1 : 0);
1401
1402 if (ha_info->is_started())
1403 DBUG_VOID_RETURN; /* already registered, return */
1404
1405 ha_info->register_ha(knownn_trans, ht_arg);
1406 trn_ctx->set_ha_trx_info(trx_scope, ha_info);
1407
1408 if (ht_arg->prepare == 0)
1409 trn_ctx->set_no_2pc(trx_scope, true);
1410
1411 trn_ctx->xid_state()->set_query_id(thd->query_id);
1412 /*
1413 Register transaction start in performance schema if not done already.
1414 By doing this, we handle cases when the transaction is started implicitly in
1415 autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the
1416 executed statement is a single-statement transaction.
1417
1418 Explicitly started transactions are handled in trans_begin().
1419
1420 Do not register transactions in which binary log is the only participating
1421 transactional storage engine.
1422 */
1423 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1424 if (thd->m_transaction_psi == NULL &&
1425 ht_arg->db_type != DB_TYPE_BINLOG)
1426 {
1427 const XID *xid= trn_ctx->xid_state()->get_xid();
1428 my_bool autocommit= !thd->in_multi_stmt_transaction_mode();
1429 thd->m_transaction_psi= MYSQL_START_TRANSACTION(&thd->m_transaction_state,
1430 xid, trxid, thd->tx_isolation,
1431 thd->tx_read_only, autocommit);
1432 DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid");
1433 gtid_set_performance_schema_values(thd);
1434 }
1435 #endif
1436 DBUG_VOID_RETURN;
1437 }
1438
1439 /**
1440 @retval
1441 0 ok
1442 @retval
1443 1 error, transaction was rolled back
1444 */
ha_prepare(THD * thd)1445 int ha_prepare(THD *thd)
1446 {
1447 int error=0;
1448 Transaction_ctx *trn_ctx= thd->get_transaction();
1449 DBUG_ENTER("ha_prepare");
1450
1451 if (trn_ctx->is_active(Transaction_ctx::SESSION))
1452 {
1453 const Ha_trx_info *ha_info= trn_ctx->ha_trx_info(
1454 Transaction_ctx::SESSION);
1455 bool gtid_error= false, need_clear_owned_gtid= false;
1456
1457 if ((gtid_error=
1458 MY_TEST(commit_owned_gtids(thd, true, &need_clear_owned_gtid))))
1459 {
1460 assert(need_clear_owned_gtid);
1461
1462 ha_rollback_trans(thd, true);
1463 error= 1;
1464 goto err;
1465 }
1466
1467 while (ha_info)
1468 {
1469 handlerton *ht= ha_info->ht();
1470 assert(!thd->status_var_aggregated);
1471 thd->status_var.ha_prepare_count++;
1472 if (ht->prepare)
1473 {
1474 DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
1475 ha_rollback_trans(thd, true);
1476 DBUG_RETURN(1);
1477 });
1478 if (ht->prepare(ht, thd, true))
1479 {
1480 ha_rollback_trans(thd, true);
1481 error=1;
1482 break;
1483 }
1484 }
1485 else
1486 {
1487 push_warning_printf(thd, Sql_condition::SL_WARNING,
1488 ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
1489 ha_resolve_storage_engine_name(ht));
1490 }
1491 ha_info= ha_info->next();
1492 }
1493
1494 assert(thd->get_transaction()->xid_state()->
1495 has_state(XID_STATE::XA_IDLE));
1496
1497 err:
1498 gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error);
1499 }
1500
1501 DBUG_RETURN(error);
1502 }
1503
1504 /**
1505 Check if we can skip the two-phase commit.
1506
1507 A helper function to evaluate if two-phase commit is mandatory.
1508 As a side effect, propagates the read-only/read-write flags
1509 of the statement transaction to its enclosing normal transaction.
1510
1511 If we have at least two engines with read-write changes we must
1512 run a two-phase commit. Otherwise we can run several independent
1513 commits as the only transactional engine has read-write changes
1514 and others are read-only.
1515
1516 @retval 0 All engines are read-only.
1517 @retval 1 We have the only engine with read-write changes.
1518 @retval >1 More than one engine have read-write changes.
1519 Note: return value might NOT be the exact number of
1520 engines with read-write changes.
1521 */
1522
1523 static
1524 uint
ha_check_and_coalesce_trx_read_only(THD * thd,Ha_trx_info * ha_list,bool all)1525 ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
1526 bool all)
1527 {
1528 /* The number of storage engines that have actual changes. */
1529 unsigned rw_ha_count= 0;
1530 Ha_trx_info *ha_info;
1531
1532 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
1533 {
1534 if (ha_info->is_trx_read_write())
1535 ++rw_ha_count;
1536
1537 if (! all)
1538 {
1539 Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
1540 assert(ha_info != ha_info_all);
1541 /*
1542 Merge read-only/read-write information about statement
1543 transaction to its enclosing normal transaction. Do this
1544 only if in a real transaction -- that is, if we know
1545 that ha_info_all is registered in thd->transaction.all.
1546 Since otherwise we only clutter the normal transaction flags.
1547 */
1548 if (ha_info_all->is_started()) /* FALSE if autocommit. */
1549 ha_info_all->coalesce_trx_with(ha_info);
1550 }
1551 else if (rw_ha_count > 1)
1552 {
1553 /*
1554 It is a normal transaction, so we don't need to merge read/write
1555 information up, and the need for two-phase commit has been
1556 already established. Break the loop prematurely.
1557 */
1558 break;
1559 }
1560 }
1561 return rw_ha_count;
1562 }
1563
1564
1565 /**
1566 The function computes condition to call gtid persistor wrapper,
1567 and executes it.
1568 It is invoked at committing a statement or transaction, including XA,
1569 and also at XA prepare handling.
1570
1571 @param thd Thread context.
1572 @param all The execution scope, true for the transaction one, false
1573 for the statement one.
1574 @param[out] need_clear_owned_gtid_ptr
1575 A pointer to bool variable to return the computed decision
1576 value.
1577 @return zero as no error indication, non-zero otherwise
1578 */
1579
commit_owned_gtids(THD * thd,bool all,bool * need_clear_owned_gtid_ptr)1580 int commit_owned_gtids(THD *thd, bool all, bool *need_clear_owned_gtid_ptr)
1581 {
1582 DBUG_ENTER("commit_owned_gtids(...)");
1583 int error= 0;
1584
1585 if ((!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates)) &&
1586 (all || !thd->in_multi_stmt_transaction_mode()) &&
1587 !thd->is_operating_gtid_table_implicitly &&
1588 !thd->is_operating_substatement_implicitly)
1589 {
1590 /*
1591 If the binary log is disabled for this thread (either by
1592 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1593 slave thread), then the statement will not be written to
1594 the binary log. In this case, we should save its GTID into
1595 mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it
1596 did when binlog is enabled.
1597 */
1598 if (thd->owned_gtid.sidno > 0)
1599 {
1600 error= gtid_state->save(thd);
1601 *need_clear_owned_gtid_ptr= true;
1602 }
1603 else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)
1604 *need_clear_owned_gtid_ptr= true;
1605 }
1606 else
1607 {
1608 *need_clear_owned_gtid_ptr= false;
1609 }
1610
1611 DBUG_RETURN(error);
1612 }
1613
1614
1615 /**
1616 The function is a wrapper of commit_owned_gtids(...). It is invoked
1617 at committing a partially failed statement or transaction.
1618
1619 @param thd Thread context.
1620
1621 @retval -1 if error when persisting owned gtid.
1622 @retval 0 if succeed to commit owned gtid.
1623 @retval 1 if do not meet conditions to commit owned gtid.
1624 */
commit_owned_gtid_by_partial_command(THD * thd)1625 int commit_owned_gtid_by_partial_command(THD *thd)
1626 {
1627 DBUG_ENTER("commit_owned_gtid_by_partial_command(THD *thd)");
1628 bool need_clear_owned_gtid_ptr= false;
1629 int ret= 0;
1630
1631 if (commit_owned_gtids(thd, true, &need_clear_owned_gtid_ptr))
1632 {
1633 /* Error when saving gtid into mysql.gtid_executed table. */
1634 gtid_state->update_on_rollback(thd);
1635 ret= -1;
1636 }
1637 else if (need_clear_owned_gtid_ptr)
1638 {
1639 gtid_state->update_on_commit(thd);
1640 ret= 0;
1641 }
1642 else
1643 {
1644 ret= 1;
1645 }
1646
1647 DBUG_RETURN(ret);
1648 }
1649
1650
1651 /**
1652 @param[in] ignore_global_read_lock Allow commit to complete even if a
1653 global read lock is active. This can be
1654 used to allow changes to internal tables
1655 (e.g. slave status tables).
1656
1657 @retval
1658 0 ok
1659 @retval
1660 1 transaction was rolled back
1661 @retval
1662 2 error during commit, data may be inconsistent
1663
1664 @todo
1665 Since we don't support nested statement transactions in 5.0,
1666 we can't commit or rollback stmt transactions while we are inside
1667 stored functions or triggers. So we simply do nothing now.
1668 TODO: This should be fixed in later ( >= 5.1) releases.
1669 */
1670
ha_commit_trans(THD * thd,bool all,bool ignore_global_read_lock)1671 int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock)
1672 {
1673 int error= 0;
1674 bool need_clear_owned_gtid= false;
1675 /*
1676 Save transaction owned gtid into table before transaction prepare
1677 if binlog is disabled, or binlog is enabled and log_slave_updates
1678 is disabled with slave SQL thread or slave worker thread.
1679 */
1680 error= commit_owned_gtids(thd, all, &need_clear_owned_gtid);
1681
1682 /*
1683 'all' means that this is either an explicit commit issued by
1684 user, or an implicit commit issued by a DDL.
1685 */
1686 Transaction_ctx *trn_ctx= thd->get_transaction();
1687 Transaction_ctx::enum_trx_scope trx_scope=
1688 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1689
1690 /*
1691 "real" is a nick name for a transaction for which a commit will
1692 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
1693 transation is not 'real': even though it's possible to commit it,
1694 the changes are not durable as they might be rolled back if the
1695 enclosing 'all' transaction is rolled back.
1696 */
1697 bool is_real_trans=
1698 all || !trn_ctx->is_active(Transaction_ctx::SESSION);
1699
1700 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope);
1701 XID_STATE *xid_state= trn_ctx->xid_state();
1702
1703 DBUG_ENTER("ha_commit_trans");
1704
1705 DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d",
1706 all, thd->in_sub_stmt, ha_info, is_real_trans));
1707 /*
1708 We must not commit the normal transaction if a statement
1709 transaction is pending. Otherwise statement transaction
1710 flags will not get propagated to its normal transaction's
1711 counterpart.
1712 */
1713 assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
1714 !all);
1715
1716 if (thd->in_sub_stmt)
1717 {
1718 assert(0);
1719 /*
1720 Since we don't support nested statement transactions in 5.0,
1721 we can't commit or rollback stmt transactions while we are inside
1722 stored functions or triggers. So we simply do nothing now.
1723 TODO: This should be fixed in later ( >= 5.1) releases.
1724 */
1725 if (!all)
1726 DBUG_RETURN(0);
1727 /*
1728 We assume that all statements which commit or rollback main transaction
1729 are prohibited inside of stored functions or triggers. So they should
1730 bail out with error even before ha_commit_trans() call. To be 100% safe
1731 let us throw error in non-debug builds.
1732 */
1733 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1734 DBUG_RETURN(2);
1735 }
1736
1737 MDL_request mdl_request;
1738 bool release_mdl= false;
1739 if (ha_info)
1740 {
1741 uint rw_ha_count;
1742 bool rw_trans;
1743
1744 DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
1745
1746 rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1747 trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count);
1748 /* rw_trans is TRUE when we in a transaction changing data */
1749 rw_trans= is_real_trans && (rw_ha_count > 0);
1750
1751 DBUG_EXECUTE_IF("dbug.enabled_commit",
1752 {
1753 const char act[]= "now signal Reached wait_for signal.commit_continue";
1754 assert(!debug_sync_set_action(current_thd,
1755 STRING_WITH_LEN(act)));
1756 };);
1757 if (rw_trans && !ignore_global_read_lock)
1758 {
1759 /*
1760 Acquire a metadata lock which will ensure that COMMIT is blocked
1761 by an active FLUSH TABLES WITH READ LOCK (and vice versa:
1762 COMMIT in progress blocks FTWRL).
1763
1764 We allow the owner of FTWRL to COMMIT; we assume that it knows
1765 what it does.
1766 */
1767 MDL_REQUEST_INIT(&mdl_request,
1768 MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
1769 MDL_EXPLICIT);
1770
1771 DBUG_PRINT("debug", ("Acquire MDL commit lock"));
1772 if (thd->mdl_context.acquire_lock(&mdl_request,
1773 thd->variables.lock_wait_timeout))
1774 {
1775 ha_rollback_trans(thd, all);
1776 DBUG_RETURN(1);
1777 }
1778 release_mdl= true;
1779
1780 DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock");
1781 }
1782
1783 if (rw_trans && (stmt_has_updated_trans_table(ha_info)
1784 || trans_has_noop_dml(ha_info)) && check_readonly(thd, true))
1785 {
1786 ha_rollback_trans(thd, all);
1787 error= 1;
1788 goto end;
1789 }
1790
1791 if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1))
1792 error= tc_log->prepare(thd, all);
1793 }
1794 /*
1795 The state of XA transaction is changed to Prepared, intermediately.
1796 It's going to change to the regular NOTR at the end.
1797 The fact of the Prepared state is of interest to binary logger.
1798 */
1799 if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE))
1800 {
1801 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1802 static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1803 get_xa_opt() == XA_ONE_PHASE);
1804
1805 xid_state->set_state(XID_STATE::XA_PREPARED);
1806 }
1807 if (error || (error= tc_log->commit(thd, all)))
1808 {
1809 ha_rollback_trans(thd, all);
1810 error= 1;
1811 goto end;
1812 }
1813 /*
1814 Mark multi-statement (any autocommit mode) or single-statement
1815 (autocommit=1) transaction as rolled back
1816 */
1817 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
1818 if (is_real_trans && thd->m_transaction_psi != NULL)
1819 {
1820 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1821 thd->m_transaction_psi= NULL;
1822 }
1823 #endif
1824 DBUG_EXECUTE_IF("crash_commit_after",
1825 if (!thd->is_operating_gtid_table_implicitly)
1826 DBUG_SUICIDE(););
1827 end:
1828 if (release_mdl && mdl_request.ticket)
1829 {
1830 /*
1831 We do not always immediately release transactional locks
1832 after ha_commit_trans() (see uses of ha_enable_transaction()),
1833 thus we release the commit blocker lock as soon as it's
1834 not needed.
1835 */
1836 DBUG_PRINT("debug", ("Releasing MDL commit lock"));
1837 thd->mdl_context.release_lock(mdl_request.ticket);
1838 }
1839 /* Free resources and perform other cleanup even for 'empty' transactions. */
1840 if (is_real_trans)
1841 {
1842 trn_ctx->cleanup();
1843 thd->tx_priority= 0;
1844 }
1845
1846 if (need_clear_owned_gtid)
1847 {
1848 thd->server_status&= ~SERVER_STATUS_IN_TRANS;
1849 /*
1850 Release the owned GTID when binlog is disabled, or binlog is
1851 enabled and log_slave_updates is disabled with slave SQL thread
1852 or slave worker thread.
1853 */
1854 if (error)
1855 gtid_state->update_on_rollback(thd);
1856 else
1857 gtid_state->update_on_commit(thd);
1858 }
1859
1860 DBUG_RETURN(error);
1861 }
1862
1863 /**
1864 Commit the sessions outstanding transaction.
1865
1866 @pre thd->transaction.flags.commit_low == true
1867 @post thd->transaction.flags.commit_low == false
1868
1869 @note This function does not care about global read lock; the caller
1870 should.
1871
1872 @param[in] all Is set in case of explicit commit
1873 (COMMIT statement), or implicit commit
1874 issued by DDL. Is not set when called
1875 at the end of statement, even if
1876 autocommit=1.
1877 @param[in] run_after_commit
1878 True by default, otherwise, does not execute
1879 the after_commit hook in the function.
1880 */
1881
ha_commit_low(THD * thd,bool all,bool run_after_commit)1882 int ha_commit_low(THD *thd, bool all, bool run_after_commit)
1883 {
1884 int error=0;
1885 Transaction_ctx *trn_ctx= thd->get_transaction();
1886 Transaction_ctx::enum_trx_scope trx_scope=
1887 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1888 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1889
1890 DBUG_ENTER("ha_commit_low");
1891
1892 if (ha_info)
1893 {
1894 bool restore_backup_ha_data= false;
1895 /*
1896 At execution of XA COMMIT ONE PHASE binlog or slave applier
1897 reattaches the engine ha_data to THD, previously saved at XA START.
1898 */
1899 if (all && thd->rpl_unflag_detached_engine_ha_data())
1900 {
1901 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT);
1902 assert(static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->
1903 get_xa_opt() == XA_ONE_PHASE);
1904 restore_backup_ha_data= true;
1905 }
1906
1907 for (; ha_info; ha_info= ha_info_next)
1908 {
1909 int err;
1910 handlerton *ht= ha_info->ht();
1911 if ((err= ht->commit(ht, thd, all)))
1912 {
1913 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1914 error=1;
1915 }
1916 assert(!thd->status_var_aggregated);
1917 thd->status_var.ha_commit_count++;
1918 ha_info_next= ha_info->next();
1919 if (restore_backup_ha_data)
1920 reattach_engine_ha_data_to_thd(thd, ht);
1921 ha_info->reset(); /* keep it conveniently zero-filled */
1922 }
1923 trn_ctx->reset_scope(trx_scope);
1924 if (all)
1925 {
1926 trn_ctx->invalidate_changed_tables_in_cache();
1927 }
1928 }
1929 /* Free resources and perform other cleanup even for 'empty' transactions. */
1930 if (all)
1931 trn_ctx->cleanup();
1932 /*
1933 When the transaction has been committed, we clear the commit_low
1934 flag. This allow other parts of the system to check if commit_low
1935 was called.
1936 */
1937 trn_ctx->m_flags.commit_low= false;
1938 if (run_after_commit && thd->get_transaction()->m_flags.run_hooks)
1939 {
1940 /*
1941 If commit succeeded, we call the after_commit hook.
1942
1943 TODO: Investigate if this can be refactored so that there is
1944 only one invocation of this hook in the code (in
1945 MYSQL_LOG_BIN::finish_commit).
1946 */
1947 if (!error)
1948 (void) RUN_HOOK(transaction, after_commit, (thd, all));
1949 trn_ctx->m_flags.run_hooks= false;
1950 }
1951 DBUG_RETURN(error);
1952 }
1953
1954
ha_rollback_low(THD * thd,bool all)1955 int ha_rollback_low(THD *thd, bool all)
1956 {
1957 Transaction_ctx *trn_ctx= thd->get_transaction();
1958 int error= 0;
1959 Transaction_ctx::enum_trx_scope trx_scope=
1960 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
1961 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(trx_scope), *ha_info_next;
1962
1963 (void) RUN_HOOK(transaction, before_rollback, (thd, all));
1964
1965 if (ha_info)
1966 {
1967 bool restore_backup_ha_data= false;
1968 /*
1969 Similarly to the commit case, the binlog or slave applier
1970 reattaches the engine ha_data to THD.
1971 */
1972 if (all && thd->rpl_unflag_detached_engine_ha_data())
1973 {
1974 assert(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR ||
1975 thd->killed == THD::KILL_CONNECTION);
1976
1977 restore_backup_ha_data= true;
1978 }
1979
1980 for (; ha_info; ha_info= ha_info_next)
1981 {
1982 int err;
1983 handlerton *ht= ha_info->ht();
1984 if ((err= ht->rollback(ht, thd, all)))
1985 { // cannot happen
1986 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1987 error= 1;
1988 }
1989 assert(!thd->status_var_aggregated);
1990 thd->status_var.ha_rollback_count++;
1991 ha_info_next= ha_info->next();
1992 if (restore_backup_ha_data)
1993 reattach_engine_ha_data_to_thd(thd, ht);
1994 ha_info->reset(); /* keep it conveniently zero-filled */
1995 }
1996 trn_ctx->reset_scope(trx_scope);
1997 }
1998
1999 /*
2000 Thanks to possibility of MDL deadlock rollback request can come even if
2001 transaction hasn't been started in any transactional storage engine.
2002
2003 It is possible to have a call of ha_rollback_low() while handling
2004 failure from ha_prepare() and an error in Daignostics_area still
2005 wasn't set. Therefore it is required to check that an error in
2006 Diagnostics_area is set before calling the method XID_STATE::set_error().
2007
2008 If it wasn't done it would lead to failure of the assertion
2009 assert(m_status == DA_ERROR)
2010 in the method Diagnostics_area::mysql_errno().
2011
2012 In case ha_prepare is failed and an error wasn't set in Diagnostics_area
2013 the error ER_XA_RBROLLBACK is set in the Diagnostics_area from
2014 the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code
2015 returned by ha_prepare() is handled.
2016 */
2017 if (all && thd->transaction_rollback_request && thd->is_error())
2018 trn_ctx->xid_state()->set_error(thd);
2019
2020 (void) RUN_HOOK(transaction, after_rollback, (thd, all));
2021 return error;
2022 }
2023
2024
ha_rollback_trans(THD * thd,bool all)2025 int ha_rollback_trans(THD *thd, bool all)
2026 {
2027 int error=0;
2028 Transaction_ctx *trn_ctx= thd->get_transaction();
2029 bool is_xa_rollback= trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED);
2030
2031 /*
2032 "real" is a nick name for a transaction for which a commit will
2033 make persistent changes. E.g. a 'stmt' transaction inside a 'all'
2034 transaction is not 'real': even though it's possible to commit it,
2035 the changes are not durable as they might be rolled back if the
2036 enclosing 'all' transaction is rolled back.
2037 We establish the value of 'is_real_trans' by checking
2038 if it's an explicit COMMIT or BEGIN statement, or implicit
2039 commit issued by DDL (in these cases all == TRUE),
2040 or if we're running in autocommit mode (it's only in the autocommit mode
2041 ha_commit_one_phase() is called with an empty
2042 transaction.all.ha_list, see why in trans_register_ha()).
2043 */
2044 bool is_real_trans=
2045 all || !trn_ctx->is_active(Transaction_ctx::SESSION);
2046
2047 DBUG_ENTER("ha_rollback_trans");
2048
2049 /*
2050 We must not rollback the normal transaction if a statement
2051 transaction is pending.
2052 */
2053 assert(!trn_ctx->is_active(Transaction_ctx::STMT) ||
2054 !all);
2055
2056 if (thd->in_sub_stmt)
2057 {
2058 assert(0);
2059 /*
2060 If we are inside stored function or trigger we should not commit or
2061 rollback current statement transaction. See comment in ha_commit_trans()
2062 call for more information.
2063 */
2064 if (!all)
2065 DBUG_RETURN(0);
2066 my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
2067 DBUG_RETURN(1);
2068 }
2069
2070 if (tc_log)
2071 error= tc_log->rollback(thd, all);
2072 /*
2073 Mark multi-statement (any autocommit mode) or single-statement
2074 (autocommit=1) transaction as rolled back
2075 */
2076 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2077 if (all || !thd->in_active_multi_stmt_transaction())
2078 {
2079 MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi);
2080 thd->m_transaction_psi= NULL;
2081 }
2082 #endif
2083
2084 /* Always cleanup. Even if nht==0. There may be savepoints. */
2085 if (is_real_trans)
2086 {
2087 trn_ctx->cleanup();
2088 thd->tx_priority= 0;
2089 }
2090
2091 if (all)
2092 thd->transaction_rollback_request= FALSE;
2093
2094 /*
2095 Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if
2096 complete transaction is being rollback or autocommit=1.
2097 Notice, XA rollback has just invoked update_on_commit() through
2098 tc_log->*rollback* stack.
2099 */
2100 if (is_real_trans && !is_xa_rollback)
2101 gtid_state->update_on_rollback(thd);
2102
2103 /*
2104 If the transaction cannot be rolled back safely, warn; don't warn if this
2105 is a slave thread (because when a slave thread executes a ROLLBACK, it has
2106 been read from the binary log, so it's 100% sure and normal to produce
2107 error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
2108 slave SQL thread, it would not stop the thread but just be printed in
2109 the error log; but we don't want users to wonder why they have this
2110 message in the error log, so we don't send it.
2111 */
2112 if (is_real_trans &&
2113 trn_ctx->cannot_safely_rollback(
2114 Transaction_ctx::SESSION) &&
2115 !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
2116 trn_ctx->push_unsafe_rollback_warnings(thd);
2117
2118 DBUG_RETURN(error);
2119 }
2120
2121
2122 /**
2123 Commit the attachable transaction in storage engines.
2124
2125 @note This is slimmed down version of ha_commit_trans()/ha_commit_low()
2126 which commits attachable transaction but skips code which is
2127 unnecessary and unsafe for them (like dealing with GTIDs).
2128 Since attachable transactions are read-only their commit only
2129 needs to release resources and cleanup state in SE.
2130
2131 @param thd Current thread
2132
2133 @retval 0 - Success
2134 @retval non-0 - Failure
2135 */
ha_commit_attachable(THD * thd)2136 int ha_commit_attachable(THD *thd)
2137 {
2138 int error= 0;
2139 Transaction_ctx *trn_ctx= thd->get_transaction();
2140 Ha_trx_info *ha_info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2141 Ha_trx_info *ha_info_next;
2142
2143 /* This function only handles attachable transactions. */
2144 assert(thd->is_attachable_ro_transaction_active());
2145 /*
2146 Since the attachable transaction is AUTOCOMMIT we only need
2147 to care about statement transaction.
2148 */
2149 assert(! trn_ctx->is_active(Transaction_ctx::SESSION));
2150
2151 if (ha_info)
2152 {
2153 for (; ha_info; ha_info= ha_info_next)
2154 {
2155 /* Attachable transaction is not supposed to modify anything. */
2156 assert(! ha_info->is_trx_read_write());
2157
2158 handlerton *ht= ha_info->ht();
2159 if (ht->commit(ht, thd, false))
2160 {
2161 /*
2162 In theory this should not happen since attachable transactions
2163 are read only and therefore commit is supposed to only release
2164 resources/cleanup state. Even if this happens we will simply
2165 continue committing attachable transaction in other SEs.
2166 */
2167 assert(false);
2168 error= 1;
2169 }
2170 assert(!thd->status_var_aggregated);
2171 thd->status_var.ha_commit_count++;
2172 ha_info_next= ha_info->next();
2173
2174 ha_info->reset(); /* keep it conveniently zero-filled */
2175 }
2176 trn_ctx->reset_scope(Transaction_ctx::STMT);
2177 }
2178
2179 /*
2180 Mark transaction as commited in PSI.
2181 */
2182 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2183 if (thd->m_transaction_psi != NULL)
2184 {
2185 MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
2186 thd->m_transaction_psi= NULL;
2187 }
2188 #endif
2189
2190 /* Free resources and perform other cleanup even for 'empty' transactions. */
2191 trn_ctx->cleanup();
2192
2193 return (error);
2194 }
2195
2196
2197 /**
2198 @details
2199 This function should be called when MySQL sends rows of a SELECT result set
2200 or the EOF mark to the client. It releases a possible adaptive hash index
2201 S-latch held by thd in InnoDB and also releases a possible InnoDB query
2202 FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
2203 keep them over several calls of the InnoDB handler interface when a join
2204 is executed. But when we let the control to pass to the client they have
2205 to be released because if the application program uses mysql_use_result(),
2206 it may deadlock on the S-latch if the application on another connection
2207 performs another SQL query. In MySQL-4.1 this is even more important because
2208 there a connection can have several SELECT queries open at the same time.
2209
2210 @param thd the thread handle of the current connection
2211
2212 @return
2213 always 0
2214 */
2215
ha_release_temporary_latches(THD * thd)2216 int ha_release_temporary_latches(THD *thd)
2217 {
2218 const Ha_trx_info *info;
2219 Transaction_ctx *trn_ctx= thd->get_transaction();
2220
2221 /*
2222 Note that below we assume that only transactional storage engines
2223 may need release_temporary_latches(). If this will ever become false,
2224 we could iterate on thd->open_tables instead (and remove duplicates
2225 as if (!seen[hton->slot]) { seen[hton->slot]=1; ... }).
2226 */
2227 for (info= trn_ctx->ha_trx_info(Transaction_ctx::STMT);
2228 info; info= info->next())
2229 {
2230 handlerton *hton= info->ht();
2231 if (hton && hton->release_temporary_latches)
2232 hton->release_temporary_latches(hton, thd);
2233 }
2234 return 0;
2235 }
2236
2237 /**
2238 Check if all storage engines used in transaction agree that after
2239 rollback to savepoint it is safe to release MDL locks acquired after
2240 savepoint creation.
2241
2242 @param thd The client thread that executes the transaction.
2243
2244 @return true - It is safe to release MDL locks.
2245 false - If it is not.
2246 */
ha_rollback_to_savepoint_can_release_mdl(THD * thd)2247 bool ha_rollback_to_savepoint_can_release_mdl(THD *thd)
2248 {
2249 Ha_trx_info *ha_info;
2250 Transaction_ctx *trn_ctx= thd->get_transaction();
2251 Transaction_ctx::enum_trx_scope trx_scope=
2252 thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION;
2253
2254 DBUG_ENTER("ha_rollback_to_savepoint_can_release_mdl");
2255
2256 /**
2257 Checking whether it is safe to release metadata locks after rollback to
2258 savepoint in all the storage engines that are part of the transaction.
2259 */
2260 for (ha_info= trn_ctx->ha_trx_info(trx_scope);
2261 ha_info; ha_info= ha_info->next())
2262 {
2263 handlerton *ht= ha_info->ht();
2264 assert(ht);
2265
2266 if (ht->savepoint_rollback_can_release_mdl == 0 ||
2267 ht->savepoint_rollback_can_release_mdl(ht, thd) == false)
2268 DBUG_RETURN(false);
2269 }
2270
2271 DBUG_RETURN(true);
2272 }
2273
ha_rollback_to_savepoint(THD * thd,SAVEPOINT * sv)2274 int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
2275 {
2276 int error=0;
2277 Transaction_ctx *trn_ctx= thd->get_transaction();
2278 Transaction_ctx::enum_trx_scope trx_scope=
2279 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2280
2281 Ha_trx_info *ha_info, *ha_info_next;
2282
2283 DBUG_ENTER("ha_rollback_to_savepoint");
2284
2285 trn_ctx->set_rw_ha_count(trx_scope, 0);
2286 trn_ctx->set_no_2pc(trx_scope, 0);
2287 /*
2288 rolling back to savepoint in all storage engines that were part of the
2289 transaction when the savepoint was set
2290 */
2291 for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
2292 {
2293 int err;
2294 handlerton *ht= ha_info->ht();
2295 assert(ht);
2296 assert(ht->savepoint_set != 0);
2297 if ((err= ht->savepoint_rollback(ht, thd,
2298 (uchar *)(sv+1)+ht->savepoint_offset)))
2299 { // cannot happen
2300 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2301 error=1;
2302 }
2303 assert(!thd->status_var_aggregated);
2304 thd->status_var.ha_savepoint_rollback_count++;
2305 if (ht->prepare == 0)
2306 trn_ctx->set_no_2pc(trx_scope, true);
2307 }
2308
2309 /*
2310 rolling back the transaction in all storage engines that were not part of
2311 the transaction when the savepoint was set
2312 */
2313 for (ha_info= trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list;
2314 ha_info= ha_info_next)
2315 {
2316 int err;
2317 handlerton *ht= ha_info->ht();
2318 if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
2319 { // cannot happen
2320 my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
2321 error=1;
2322 }
2323 assert(!thd->status_var_aggregated);
2324 thd->status_var.ha_rollback_count++;
2325 ha_info_next= ha_info->next();
2326 ha_info->reset(); /* keep it conveniently zero-filled */
2327 }
2328 trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list);
2329
2330 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2331 if (thd->m_transaction_psi != NULL)
2332 MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1);
2333 #endif
2334
2335 DBUG_RETURN(error);
2336 }
2337
ha_prepare_low(THD * thd,bool all)2338 int ha_prepare_low(THD *thd, bool all)
2339 {
2340 int error= 0;
2341 Transaction_ctx::enum_trx_scope trx_scope=
2342 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2343 Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2344
2345 DBUG_ENTER("ha_prepare_low");
2346
2347 if (ha_info)
2348 {
2349 for (; ha_info && !error; ha_info= ha_info->next())
2350 {
2351 int err= 0;
2352 handlerton *ht= ha_info->ht();
2353 /*
2354 Do not call two-phase commit if this particular
2355 transaction is read-only. This allows for simpler
2356 implementation in engines that are always read-only.
2357 */
2358 if (!ha_info->is_trx_read_write())
2359 continue;
2360 if ((err= ht->prepare(ht, thd, all)))
2361 {
2362 my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
2363 error= 1;
2364 }
2365 assert(!thd->status_var_aggregated);
2366 thd->status_var.ha_prepare_count++;
2367 }
2368 DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
2369 }
2370
2371 DBUG_RETURN(error);
2372 }
2373
2374 /**
2375 @note
2376 according to the sql standard (ISO/IEC 9075-2:2003)
2377 section "4.33.4 SQL-statements and transaction states",
2378 SAVEPOINT is *not* transaction-initiating SQL-statement
2379 */
ha_savepoint(THD * thd,SAVEPOINT * sv)2380 int ha_savepoint(THD *thd, SAVEPOINT *sv)
2381 {
2382 int error=0;
2383 Transaction_ctx::enum_trx_scope trx_scope=
2384 !thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
2385 Ha_trx_info *ha_info= thd->get_transaction()->ha_trx_info(trx_scope);
2386 Ha_trx_info *begin_ha_info= ha_info;
2387
2388 DBUG_ENTER("ha_savepoint");
2389
2390 for (; ha_info; ha_info= ha_info->next())
2391 {
2392 int err;
2393 handlerton *ht= ha_info->ht();
2394 assert(ht);
2395 if (! ht->savepoint_set)
2396 {
2397 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
2398 error=1;
2399 break;
2400 }
2401 if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
2402 { // cannot happen
2403 my_error(ER_GET_ERRNO, MYF(0), err);
2404 error=1;
2405 }
2406 assert(!thd->status_var_aggregated);
2407 thd->status_var.ha_savepoint_count++;
2408 }
2409 /*
2410 Remember the list of registered storage engines. All new
2411 engines are prepended to the beginning of the list.
2412 */
2413 sv->ha_list= begin_ha_info;
2414
2415 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2416 if (!error && thd->m_transaction_psi != NULL)
2417 MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1);
2418 #endif
2419
2420 DBUG_RETURN(error);
2421 }
2422
ha_release_savepoint(THD * thd,SAVEPOINT * sv)2423 int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
2424 {
2425 int error=0;
2426 Ha_trx_info *ha_info= sv->ha_list;
2427 DBUG_ENTER("ha_release_savepoint");
2428
2429 for (; ha_info; ha_info= ha_info->next())
2430 {
2431 int err;
2432 handlerton *ht= ha_info->ht();
2433 /* Savepoint life time is enclosed into transaction life time. */
2434 assert(ht);
2435 if (!ht->savepoint_release)
2436 continue;
2437 if ((err= ht->savepoint_release(ht, thd,
2438 (uchar *)(sv+1) + ht->savepoint_offset)))
2439 { // cannot happen
2440 my_error(ER_GET_ERRNO, MYF(0), err);
2441 error=1;
2442 }
2443 }
2444
2445 #ifdef HAVE_PSI_TRANSACTION_INTERFACE
2446 if (thd->m_transaction_psi != NULL)
2447 MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1);
2448 #endif
2449 DBUG_RETURN(error);
2450 }
2451
2452
snapshot_handlerton(THD * thd,plugin_ref plugin,void * arg)2453 static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
2454 void *arg)
2455 {
2456 handlerton *hton= plugin_data<handlerton*>(plugin);
2457 if (hton->state == SHOW_OPTION_YES &&
2458 hton->start_consistent_snapshot)
2459 {
2460 hton->start_consistent_snapshot(hton, thd);
2461 *((bool *)arg)= false;
2462 }
2463 return FALSE;
2464 }
2465
ha_start_consistent_snapshot(THD * thd)2466 int ha_start_consistent_snapshot(THD *thd)
2467 {
2468 bool warn= true;
2469
2470 plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
2471
2472 /*
2473 Same idea as when one wants to CREATE TABLE in one engine which does not
2474 exist:
2475 */
2476 if (warn)
2477 push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
2478 "This MySQL server does not support any "
2479 "consistent-read capable storage engine");
2480 return 0;
2481 }
2482
2483
flush_handlerton(THD * thd,plugin_ref plugin,void * arg)2484 static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
2485 void *arg)
2486 {
2487 handlerton *hton= plugin_data<handlerton*>(plugin);
2488 if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
2489 hton->flush_logs(hton, *(static_cast<bool *>(arg))))
2490 return TRUE;
2491 return FALSE;
2492 }
2493
2494
ha_flush_logs(handlerton * db_type,bool binlog_group_flush)2495 bool ha_flush_logs(handlerton *db_type, bool binlog_group_flush)
2496 {
2497 if (db_type == NULL)
2498 {
2499 if (plugin_foreach(NULL, flush_handlerton,
2500 MYSQL_STORAGE_ENGINE_PLUGIN,
2501 static_cast<void *>(&binlog_group_flush)))
2502 return TRUE;
2503 }
2504 else
2505 {
2506 if (db_type->state != SHOW_OPTION_YES ||
2507 (db_type->flush_logs &&
2508 db_type->flush_logs(db_type, binlog_group_flush)))
2509 return TRUE;
2510 }
2511 return FALSE;
2512 }
2513
2514
2515 /**
2516 @brief make canonical filename
2517
2518 @param[in] file table handler
2519 @param[in] path original path
2520 @param[out] tmp_path buffer for canonized path
2521
2522 @details Lower case db name and table name path parts for
2523 non file based tables when lower_case_table_names
2524 is 2 (store as is, compare in lower case).
2525 Filesystem path prefix (mysql_data_home or tmpdir)
2526 is left intact.
2527
2528 @note tmp_path may be left intact if no conversion was
2529 performed.
2530
2531 @retval canonized path
2532
2533 @todo This may be done more efficiently when table path
2534 gets built. Convert this function to something like
2535 ASSERT_CANONICAL_FILENAME.
2536 */
get_canonical_filename(handler * file,const char * path,char * tmp_path)2537 const char *get_canonical_filename(handler *file, const char *path,
2538 char *tmp_path)
2539 {
2540 uint i;
2541 if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
2542 return path;
2543
2544 for (i= 0; i <= mysql_tmpdir_list.max; i++)
2545 {
2546 if (is_prefix(path, mysql_tmpdir_list.list[i]))
2547 return path;
2548 }
2549
2550 /* Ensure that table handler get path in lower case */
2551 if (tmp_path != path)
2552 my_stpcpy(tmp_path, path);
2553
2554 /*
2555 we only should turn into lowercase database/table part
2556 so start the process after homedirectory
2557 */
2558 my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
2559 return tmp_path;
2560 }
2561
2562
2563 class Ha_delete_table_error_handler: public Internal_error_handler
2564 {
2565 public:
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)2566 virtual bool handle_condition(THD *thd,
2567 uint sql_errno,
2568 const char* sqlstate,
2569 Sql_condition::enum_severity_level *level,
2570 const char* msg)
2571 {
2572 /* Downgrade errors to warnings. */
2573 if (*level == Sql_condition::SL_ERROR)
2574 *level= Sql_condition::SL_WARNING;
2575 return false;
2576 }
2577 };
2578
2579
2580 /** @brief
2581 This should return ENOENT if the file doesn't exists.
2582 The .frm file will be deleted only if we return 0 or ENOENT
2583 */
ha_delete_table(THD * thd,handlerton * table_type,const char * path,const char * db,const char * alias,bool generate_warning)2584 int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
2585 const char *db, const char *alias, bool generate_warning)
2586 {
2587 handler *file;
2588 char tmp_path[FN_REFLEN];
2589 int error;
2590 TABLE dummy_table;
2591 TABLE_SHARE dummy_share;
2592 DBUG_ENTER("ha_delete_table");
2593
2594 dummy_table.s= &dummy_share;
2595
2596 /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
2597 if (table_type == NULL ||
2598 ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
2599 DBUG_RETURN(ENOENT);
2600
2601 path= get_canonical_filename(file, path, tmp_path);
2602 if ((error= file->ha_delete_table(path)) && generate_warning)
2603 {
2604 /*
2605 Because file->print_error() use my_error() to generate the error message
2606 we use an internal error handler to intercept it and store the text
2607 in a temporary buffer. Later the message will be presented to user
2608 as a warning.
2609 */
2610 Ha_delete_table_error_handler ha_delete_table_error_handler;
2611
2612 /* Fill up strucutures that print_error may need */
2613 dummy_share.path.str= (char*) path;
2614 dummy_share.path.length= strlen(path);
2615 dummy_share.db.str= (char*) db;
2616 dummy_share.db.length= strlen(db);
2617 dummy_share.table_name.str= (char*) alias;
2618 dummy_share.table_name.length= strlen(alias);
2619 dummy_table.alias= alias;
2620
2621 file->change_table_ptr(&dummy_table, &dummy_share);
2622
2623 /*
2624 XXX: should we convert *all* errors to warnings here?
2625 What if the error is fatal?
2626 */
2627 thd->push_internal_handler(&ha_delete_table_error_handler);
2628 file->print_error(error, 0);
2629
2630 thd->pop_internal_handler();
2631 }
2632 delete file;
2633
2634 #ifdef HAVE_PSI_TABLE_INTERFACE
2635 if (likely(error == 0))
2636 {
2637 /* Table share not available, so check path for temp_table prefix. */
2638 bool temp_table= (strstr(path, tmp_file_prefix) != NULL);
2639 PSI_TABLE_CALL(drop_table_share)
2640 (temp_table, db, strlen(db), alias, strlen(alias));
2641 }
2642 #endif
2643
2644 DBUG_RETURN(error);
2645 }
2646
2647 /****************************************************************************
2648 ** General handler functions
2649 ****************************************************************************/
clone(const char * name,MEM_ROOT * mem_root)2650 handler *handler::clone(const char *name, MEM_ROOT *mem_root)
2651 {
2652 DBUG_ENTER("handler::clone");
2653 handler *new_handler= get_new_handler(table->s, mem_root, ht);
2654
2655 if (!new_handler)
2656 DBUG_RETURN(NULL);
2657 if (new_handler->set_ha_share_ref(ha_share))
2658 goto err;
2659
2660 /*
2661 Allocate handler->ref here because otherwise ha_open will allocate it
2662 on this->table->mem_root and we will not be able to reclaim that memory
2663 when the clone handler object is destroyed.
2664 */
2665 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2666 ALIGN_SIZE(ref_length)*2)))
2667 goto err;
2668 /*
2669 TODO: Implement a more efficient way to have more than one index open for
2670 the same table instance. The ha_open call is not cachable for clone.
2671 */
2672 if (new_handler->ha_open(table, name, table->db_stat,
2673 HA_OPEN_IGNORE_IF_LOCKED))
2674 goto err;
2675
2676 DBUG_RETURN(new_handler);
2677
2678 err:
2679 delete new_handler;
2680 DBUG_RETURN(NULL);
2681 }
2682
2683
ha_statistic_increment(ulonglong SSV::* offset) const2684 void handler::ha_statistic_increment(ulonglong SSV::*offset) const
2685 {
2686 if (table && table->in_use) (table->in_use->status_var.*offset)++;
2687 }
2688
2689
ha_thd(void) const2690 THD *handler::ha_thd(void) const
2691 {
2692 assert(!table || !table->in_use || table->in_use == current_thd);
2693 return (table && table->in_use) ? table->in_use : current_thd;
2694 }
2695
unbind_psi()2696 void handler::unbind_psi()
2697 {
2698 #ifdef HAVE_PSI_TABLE_INTERFACE
2699 assert(m_lock_type == F_UNLCK);
2700 assert(inited == NONE);
2701 /*
2702 Notify the instrumentation that this table is not owned
2703 by this thread any more.
2704 */
2705 PSI_TABLE_CALL(unbind_table)(m_psi);
2706 #endif
2707 }
2708
rebind_psi()2709 void handler::rebind_psi()
2710 {
2711 #ifdef HAVE_PSI_TABLE_INTERFACE
2712 assert(m_lock_type == F_UNLCK);
2713 assert(inited == NONE);
2714 /*
2715 Notify the instrumentation that this table is now owned
2716 by this thread.
2717 */
2718 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2719 m_psi= PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi);
2720 #endif
2721 }
2722
start_psi_batch_mode()2723 void handler::start_psi_batch_mode()
2724 {
2725 #ifdef HAVE_PSI_TABLE_INTERFACE
2726 assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2727 assert(m_psi_locker == NULL);
2728 m_psi_batch_mode= PSI_BATCH_MODE_STARTING;
2729 m_psi_numrows= 0;
2730 #endif
2731 }
2732
end_psi_batch_mode()2733 void handler::end_psi_batch_mode()
2734 {
2735 #ifdef HAVE_PSI_TABLE_INTERFACE
2736 assert(m_psi_batch_mode != PSI_BATCH_MODE_NONE);
2737 if (m_psi_locker != NULL)
2738 {
2739 assert(m_psi_batch_mode == PSI_BATCH_MODE_STARTED);
2740 PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows);
2741 m_psi_locker= NULL;
2742 }
2743 m_psi_batch_mode= PSI_BATCH_MODE_NONE;
2744 #endif
2745 }
2746
ha_table_share_psi(const TABLE_SHARE * share) const2747 PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const
2748 {
2749 return share->m_psi;
2750 }
2751
2752 /** @brief
2753 Open database-handler.
2754
2755 IMPLEMENTATION
2756 Try O_RDONLY if cannot open as O_RDWR
2757 Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
2758 */
ha_open(TABLE * table_arg,const char * name,int mode,int test_if_locked)2759 int handler::ha_open(TABLE *table_arg, const char *name, int mode,
2760 int test_if_locked)
2761 {
2762 int error;
2763 DBUG_ENTER("handler::ha_open");
2764 DBUG_PRINT("enter",
2765 ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
2766 name, ht->db_type, table_arg->db_stat, mode,
2767 test_if_locked));
2768
2769 table= table_arg;
2770 assert(table->s == table_share);
2771 assert(m_lock_type == F_UNLCK);
2772 DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
2773 assert(alloc_root_inited(&table->mem_root));
2774
2775 if ((error=open(name,mode,test_if_locked)))
2776 {
2777 if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
2778 (table->db_stat & HA_TRY_READ_ONLY))
2779 {
2780 table->db_stat|=HA_READ_ONLY;
2781 error=open(name,O_RDONLY,test_if_locked);
2782 }
2783 }
2784 if (error)
2785 {
2786 set_my_errno(error); /* Safeguard */
2787 DBUG_PRINT("error",("error: %d errno: %d",error,errno));
2788 }
2789 else
2790 {
2791 assert(m_psi == NULL);
2792 assert(table_share != NULL);
2793 #ifdef HAVE_PSI_TABLE_INTERFACE
2794 /*
2795 Do not call this for partitions handlers, since it may take too much
2796 resources.
2797 So only use the m_psi on table level, not for individual partitions.
2798 */
2799 if (!(test_if_locked & HA_OPEN_NO_PSI_CALL))
2800 {
2801 PSI_table_share *share_psi= ha_table_share_psi(table_share);
2802 m_psi= PSI_TABLE_CALL(open_table)(share_psi, this);
2803 }
2804 #endif
2805
2806 if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
2807 table->db_stat|=HA_READ_ONLY;
2808 (void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
2809
2810 /* ref is already allocated for us if we're called from handler::clone() */
2811 if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
2812 ALIGN_SIZE(ref_length)*2)))
2813 {
2814 ha_close();
2815 error=HA_ERR_OUT_OF_MEM;
2816 }
2817 else
2818 dup_ref=ref+ALIGN_SIZE(ref_length);
2819 cached_table_flags= table_flags();
2820 }
2821 DBUG_RETURN(error);
2822 }
2823
2824
2825 /**
2826 Close handler.
2827 */
2828
ha_close(void)2829 int handler::ha_close(void)
2830 {
2831 DBUG_ENTER("handler::ha_close");
2832 #ifdef HAVE_PSI_TABLE_INTERFACE
2833 PSI_TABLE_CALL(close_table)(table_share, m_psi);
2834 m_psi= NULL; /* instrumentation handle, invalid after close_table() */
2835 assert(m_psi_batch_mode == PSI_BATCH_MODE_NONE);
2836 assert(m_psi_locker == NULL);
2837 #endif
2838 // TODO: set table= NULL to mark the handler as closed?
2839 assert(m_psi == NULL);
2840 assert(m_lock_type == F_UNLCK);
2841 assert(inited == NONE);
2842 DBUG_RETURN(close());
2843 }
2844
2845
2846 /**
2847 Initialize use of index.
2848
2849 @param idx Index to use
2850 @param sorted Use sorted order
2851
2852 @return Operation status
2853 @retval 0 Success
2854 @retval != 0 Error (error code returned)
2855 */
2856
ha_index_init(uint idx,bool sorted)2857 int handler::ha_index_init(uint idx, bool sorted)
2858 {
2859 DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2860 int result;
2861 DBUG_ENTER("ha_index_init");
2862 assert(table_share->tmp_table != NO_TMP_TABLE ||
2863 m_lock_type != F_UNLCK);
2864 assert(inited == NONE);
2865 if (!(result= index_init(idx, sorted)))
2866 inited= INDEX;
2867 end_range= NULL;
2868 DBUG_RETURN(result);
2869 }
2870
2871
2872 /**
2873 End use of index.
2874
2875 @return Operation status
2876 @retval 0 Success
2877 @retval != 0 Error (error code returned)
2878 */
2879
ha_index_end()2880 int handler::ha_index_end()
2881 {
2882 DBUG_ENTER("ha_index_end");
2883 /* SQL HANDLER function can call this without having it locked. */
2884 assert(table->open_by_handler ||
2885 table_share->tmp_table != NO_TMP_TABLE ||
2886 m_lock_type != F_UNLCK);
2887 assert(inited == INDEX);
2888 inited= NONE;
2889 end_range= NULL;
2890 DBUG_RETURN(index_end());
2891 }
2892
2893
2894 /**
2895 Initialize table for random read or scan.
2896
2897 @param scan if true: Initialize for random scans through rnd_next()
2898 if false: Initialize for random reads through rnd_pos()
2899
2900 @return Operation status
2901 @retval 0 Success
2902 @retval != 0 Error (error code returned)
2903 */
2904
ha_rnd_init(bool scan)2905 int handler::ha_rnd_init(bool scan)
2906 {
2907 DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
2908 int result;
2909 DBUG_ENTER("ha_rnd_init");
2910 assert(table_share->tmp_table != NO_TMP_TABLE ||
2911 m_lock_type != F_UNLCK);
2912 assert(inited == NONE || (inited == RND && scan));
2913 inited= (result= rnd_init(scan)) ? NONE : RND;
2914 end_range= NULL;
2915 DBUG_RETURN(result);
2916 }
2917
2918
2919 /**
2920 End use of random access.
2921
2922 @return Operation status
2923 @retval 0 Success
2924 @retval != 0 Error (error code returned)
2925 */
2926
ha_rnd_end()2927 int handler::ha_rnd_end()
2928 {
2929 DBUG_ENTER("ha_rnd_end");
2930 /* SQL HANDLER function can call this without having it locked. */
2931 assert(table->open_by_handler ||
2932 table_share->tmp_table != NO_TMP_TABLE ||
2933 m_lock_type != F_UNLCK);
2934 assert(inited == RND);
2935 inited= NONE;
2936 end_range= NULL;
2937 DBUG_RETURN(rnd_end());
2938 }
2939
2940
2941 /**
2942 Read next row via random scan.
2943
2944 @param buf Buffer to read the row into
2945
2946 @return Operation status
2947 @retval 0 Success
2948 @retval != 0 Error (error code returned)
2949 */
2950
ha_rnd_next(uchar * buf)2951 int handler::ha_rnd_next(uchar *buf)
2952 {
2953 int result;
2954 DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;);
2955 DBUG_ENTER("handler::ha_rnd_next");
2956 assert(table_share->tmp_table != NO_TMP_TABLE ||
2957 m_lock_type != F_UNLCK);
2958 assert(inited == RND);
2959
2960 // Set status for the need to update generated fields
2961 m_update_generated_read_fields= table->has_gcol();
2962
2963 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2964 { result= rnd_next(buf); })
2965 if (!result && m_update_generated_read_fields)
2966 {
2967 result= update_generated_read_fields(buf, table);
2968 m_update_generated_read_fields= false;
2969 }
2970 DBUG_RETURN(result);
2971 }
2972
2973
2974 /**
2975 Read row via random scan from position.
2976
2977 @param[out] buf Buffer to read the row into
2978 @param pos Position from position() call
2979
2980 @return Operation status
2981 @retval 0 Success
2982 @retval != 0 Error (error code returned)
2983 */
2984
ha_rnd_pos(uchar * buf,uchar * pos)2985 int handler::ha_rnd_pos(uchar *buf, uchar *pos)
2986 {
2987 int result;
2988 DBUG_ENTER("handler::ha_rnd_pos");
2989 assert(table_share->tmp_table != NO_TMP_TABLE ||
2990 m_lock_type != F_UNLCK);
2991 /* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */
2992 /* assert(inited == RND); */
2993
2994 // Set status for the need to update generated fields
2995 m_update_generated_read_fields= table->has_gcol();
2996
2997 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result,
2998 { result= rnd_pos(buf, pos); })
2999 if (!result && m_update_generated_read_fields)
3000 {
3001 result= update_generated_read_fields(buf, table);
3002 m_update_generated_read_fields= false;
3003 }
3004 DBUG_RETURN(result);
3005 }
3006
3007
3008 /**
3009 Read [part of] row via [part of] index.
3010 @param[out] buf buffer where store the data
3011 @param key Key to search for
3012 @param keypart_map Which part of key to use
3013 @param find_flag Direction/condition on key usage
3014
3015 @returns Operation status
3016 @retval 0 Success (found a record, and function has
3017 set table->status to 0)
3018 @retval HA_ERR_END_OF_FILE Row not found (function has set table->status
3019 to STATUS_NOT_FOUND). End of index passed.
3020 @retval HA_ERR_KEY_NOT_FOUND Row not found (function has set table->status
3021 to STATUS_NOT_FOUND). Index cursor positioned.
3022 @retval != 0 Error
3023
3024 @note Positions an index cursor to the index specified in the handle.
3025 Fetches the row if available. If the key value is null,
3026 begin at the first key of the index.
3027 ha_index_read_map can be restarted without calling index_end on the previous
3028 index scan and without calling ha_index_init. In this case the
3029 ha_index_read_map is on the same index as the previous ha_index_scan.
3030 This is particularly used in conjunction with multi read ranges.
3031 */
3032
ha_index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3033 int handler::ha_index_read_map(uchar *buf, const uchar *key,
3034 key_part_map keypart_map,
3035 enum ha_rkey_function find_flag)
3036 {
3037 int result;
3038 DBUG_ENTER("handler::ha_index_read_map");
3039 assert(table_share->tmp_table != NO_TMP_TABLE ||
3040 m_lock_type != F_UNLCK);
3041 assert(inited == INDEX);
3042 assert(!pushed_idx_cond || buf == table->record[0]);
3043
3044 // Set status for the need to update generated fields
3045 m_update_generated_read_fields= table->has_gcol();
3046
3047 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3048 { result= index_read_map(buf, key, keypart_map, find_flag); })
3049 if (!result && m_update_generated_read_fields)
3050 {
3051 result= update_generated_read_fields(buf, table, active_index);
3052 m_update_generated_read_fields= false;
3053 }
3054 DBUG_RETURN(result);
3055 }
3056
ha_index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)3057 int handler::ha_index_read_last_map(uchar *buf, const uchar *key,
3058 key_part_map keypart_map)
3059 {
3060 int result;
3061 DBUG_ENTER("handler::ha_index_read_last_map");
3062 assert(table_share->tmp_table != NO_TMP_TABLE ||
3063 m_lock_type != F_UNLCK);
3064 assert(inited == INDEX);
3065 assert(!pushed_idx_cond || buf == table->record[0]);
3066
3067 // Set status for the need to update generated fields
3068 m_update_generated_read_fields= table->has_gcol();
3069
3070 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3071 { result= index_read_last_map(buf, key, keypart_map); })
3072 if (!result && m_update_generated_read_fields)
3073 {
3074 result= update_generated_read_fields(buf, table, active_index);
3075 m_update_generated_read_fields= false;
3076 }
3077 DBUG_RETURN(result);
3078 }
3079
3080 /**
3081 Initializes an index and read it.
3082
3083 @see handler::ha_index_read_map.
3084 */
3085
ha_index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3086 int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key,
3087 key_part_map keypart_map,
3088 enum ha_rkey_function find_flag)
3089 {
3090 int result;
3091 assert(table_share->tmp_table != NO_TMP_TABLE ||
3092 m_lock_type != F_UNLCK);
3093 assert(end_range == NULL);
3094 assert(!pushed_idx_cond || buf == table->record[0]);
3095
3096 // Set status for the need to update generated fields
3097 m_update_generated_read_fields= table->has_gcol();
3098
3099 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result,
3100 { result= index_read_idx_map(buf, index, key, keypart_map, find_flag); })
3101 if (!result && m_update_generated_read_fields)
3102 {
3103 result= update_generated_read_fields(buf, table, index);
3104 m_update_generated_read_fields= false;
3105 }
3106 return result;
3107 }
3108
3109
3110 /**
3111 Reads the next row via index.
3112
3113 @param[out] buf Row data
3114
3115 @return Operation status.
3116 @retval 0 Success
3117 @retval HA_ERR_END_OF_FILE Row not found
3118 @retval != 0 Error
3119 */
3120
ha_index_next(uchar * buf)3121 int handler::ha_index_next(uchar * buf)
3122 {
3123 int result;
3124 DBUG_ENTER("handler::ha_index_next");
3125 assert(table_share->tmp_table != NO_TMP_TABLE ||
3126 m_lock_type != F_UNLCK);
3127 assert(inited == INDEX);
3128 assert(!pushed_idx_cond || buf == table->record[0]);
3129
3130 // Set status for the need to update generated fields
3131 m_update_generated_read_fields= table->has_gcol();
3132
3133 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3134 { result= index_next(buf); })
3135 if (!result && m_update_generated_read_fields)
3136 {
3137 result= update_generated_read_fields(buf, table, active_index);
3138 m_update_generated_read_fields= false;
3139 }
3140 DBUG_RETURN(result);
3141 }
3142
3143
3144 /**
3145 Reads the previous row via index.
3146
3147 @param[out] buf Row data
3148
3149 @return Operation status.
3150 @retval 0 Success
3151 @retval HA_ERR_END_OF_FILE Row not found
3152 @retval != 0 Error
3153 */
3154
ha_index_prev(uchar * buf)3155 int handler::ha_index_prev(uchar * buf)
3156 {
3157 int result;
3158 DBUG_ENTER("handler::ha_index_prev");
3159 assert(table_share->tmp_table != NO_TMP_TABLE ||
3160 m_lock_type != F_UNLCK);
3161 assert(inited == INDEX);
3162 assert(!pushed_idx_cond || buf == table->record[0]);
3163
3164 // Set status for the need to update generated fields
3165 m_update_generated_read_fields= table->has_gcol();
3166
3167 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3168 { result= index_prev(buf); })
3169 if (!result && m_update_generated_read_fields)
3170 {
3171 result= update_generated_read_fields(buf, table, active_index);
3172 m_update_generated_read_fields= false;
3173 }
3174 DBUG_RETURN(result);
3175 }
3176
3177
3178 /**
3179 Reads the first row via index.
3180
3181 @param[out] buf Row data
3182
3183 @return Operation status.
3184 @retval 0 Success
3185 @retval HA_ERR_END_OF_FILE Row not found
3186 @retval != 0 Error
3187 */
3188
ha_index_first(uchar * buf)3189 int handler::ha_index_first(uchar * buf)
3190 {
3191 int result;
3192 DBUG_ENTER("handler::ha_index_first");
3193 assert(table_share->tmp_table != NO_TMP_TABLE ||
3194 m_lock_type != F_UNLCK);
3195 assert(inited == INDEX);
3196 assert(!pushed_idx_cond || buf == table->record[0]);
3197
3198 // Set status for the need to update generated fields
3199 m_update_generated_read_fields= table->has_gcol();
3200
3201 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3202 { result= index_first(buf); })
3203 if (!result && m_update_generated_read_fields)
3204 {
3205 result= update_generated_read_fields(buf, table, active_index);
3206 m_update_generated_read_fields= false;
3207 }
3208 DBUG_RETURN(result);
3209 }
3210
3211
3212 /**
3213 Reads the last row via index.
3214
3215 @param[out] buf Row data
3216
3217 @return Operation status.
3218 @retval 0 Success
3219 @retval HA_ERR_END_OF_FILE Row not found
3220 @retval != 0 Error
3221 */
3222
ha_index_last(uchar * buf)3223 int handler::ha_index_last(uchar * buf)
3224 {
3225 int result;
3226 DBUG_ENTER("handler::ha_index_last");
3227 assert(table_share->tmp_table != NO_TMP_TABLE ||
3228 m_lock_type != F_UNLCK);
3229 assert(inited == INDEX);
3230 assert(!pushed_idx_cond || buf == table->record[0]);
3231
3232 // Set status for the need to update generated fields
3233 m_update_generated_read_fields= table->has_gcol();
3234
3235 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3236 { result= index_last(buf); })
3237 if (!result && m_update_generated_read_fields)
3238 {
3239 result= update_generated_read_fields(buf, table, active_index);
3240 m_update_generated_read_fields= false;
3241 }
3242 DBUG_RETURN(result);
3243 }
3244
3245
3246 /**
3247 Reads the next same row via index.
3248
3249 @param[out] buf Row data
3250 @param key Key to search for
3251 @param keylen Length of key
3252
3253 @return Operation status.
3254 @retval 0 Success
3255 @retval HA_ERR_END_OF_FILE Row not found
3256 @retval != 0 Error
3257 */
3258
ha_index_next_same(uchar * buf,const uchar * key,uint keylen)3259 int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen)
3260 {
3261 int result;
3262 DBUG_ENTER("handler::ha_index_next_same");
3263 assert(table_share->tmp_table != NO_TMP_TABLE ||
3264 m_lock_type != F_UNLCK);
3265 assert(inited == INDEX);
3266 assert(!pushed_idx_cond || buf == table->record[0]);
3267
3268 // Set status for the need to update generated fields
3269 m_update_generated_read_fields= table->has_gcol();
3270
3271 MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result,
3272 { result= index_next_same(buf, key, keylen); })
3273 if (!result && m_update_generated_read_fields)
3274 {
3275 result= update_generated_read_fields(buf, table, active_index);
3276 m_update_generated_read_fields= false;
3277 }
3278 DBUG_RETURN(result);
3279 }
3280
3281
3282 /**
3283 Read first row (only) from a table.
3284
3285 This is never called for InnoDB tables, as these table types
3286 has the HA_STATS_RECORDS_IS_EXACT set.
3287 */
read_first_row(uchar * buf,uint primary_key)3288 int handler::read_first_row(uchar * buf, uint primary_key)
3289 {
3290 int error;
3291 DBUG_ENTER("handler::read_first_row");
3292
3293 ha_statistic_increment(&SSV::ha_read_first_count);
3294
3295 /*
3296 If there is very few deleted rows in the table, find the first row by
3297 scanning the table.
3298 TODO remove the test for HA_READ_ORDER
3299 */
3300 if (stats.deleted < 10 || primary_key >= MAX_KEY ||
3301 !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
3302 {
3303 if (!(error= ha_rnd_init(1)))
3304 {
3305 while ((error= ha_rnd_next(buf)) == HA_ERR_RECORD_DELETED)
3306 /* skip deleted row */;
3307 const int end_error= ha_rnd_end();
3308 if (!error)
3309 error= end_error;
3310 }
3311 }
3312 else
3313 {
3314 /* Find the first row through the primary key */
3315 if (!(error= ha_index_init(primary_key, 0)))
3316 {
3317 error= ha_index_first(buf);
3318 const int end_error= ha_index_end();
3319 if (!error)
3320 error= end_error;
3321 }
3322 }
3323 DBUG_RETURN(error);
3324 }
3325
3326 /**
3327 Generate the next auto-increment number based on increment and offset.
3328 computes the lowest number
3329 - strictly greater than "nr"
3330 - of the form: auto_increment_offset + N * auto_increment_increment
3331 If overflow happened then return MAX_ULONGLONG value as an
3332 indication of overflow.
3333 In most cases increment= offset= 1, in which case we get:
3334 @verbatim 1,2,3,4,5,... @endverbatim
3335 If increment=10 and offset=5 and previous number is 1, we get:
3336 @verbatim 1,5,15,25,35,... @endverbatim
3337 */
3338 inline ulonglong
compute_next_insert_id(ulonglong nr,struct system_variables * variables)3339 compute_next_insert_id(ulonglong nr,struct system_variables *variables)
3340 {
3341 const ulonglong save_nr= nr;
3342
3343 if (variables->auto_increment_increment == 1)
3344 nr= nr + 1; // optimization of the formula below
3345 else
3346 {
3347 nr= (((nr+ variables->auto_increment_increment -
3348 variables->auto_increment_offset)) /
3349 (ulonglong) variables->auto_increment_increment);
3350 nr= (nr* (ulonglong) variables->auto_increment_increment +
3351 variables->auto_increment_offset);
3352 }
3353
3354 if (unlikely(nr <= save_nr))
3355 return ULLONG_MAX;
3356
3357 return nr;
3358 }
3359
3360
adjust_next_insert_id_after_explicit_value(ulonglong nr)3361 void handler::adjust_next_insert_id_after_explicit_value(ulonglong nr)
3362 {
3363 /*
3364 If we have set THD::next_insert_id previously and plan to insert an
3365 explicitely-specified value larger than this, we need to increase
3366 THD::next_insert_id to be greater than the explicit value.
3367 */
3368 if ((next_insert_id > 0) && (nr >= next_insert_id))
3369 set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
3370 }
3371
3372
3373 /** @brief
3374 Computes the largest number X:
3375 - smaller than or equal to "nr"
3376 - of the form: auto_increment_offset + N * auto_increment_increment
3377 where N>=0.
3378
3379 SYNOPSIS
3380 prev_insert_id
3381 nr Number to "round down"
3382 variables variables struct containing auto_increment_increment and
3383 auto_increment_offset
3384
3385 RETURN
3386 The number X if it exists, "nr" otherwise.
3387 */
3388 inline ulonglong
prev_insert_id(ulonglong nr,struct system_variables * variables)3389 prev_insert_id(ulonglong nr, struct system_variables *variables)
3390 {
3391 if (unlikely(nr < variables->auto_increment_offset))
3392 {
3393 /*
3394 There's nothing good we can do here. That is a pathological case, where
3395 the offset is larger than the column's max possible value, i.e. not even
3396 the first sequence value may be inserted. User will receive warning.
3397 */
3398 DBUG_PRINT("info",("auto_increment: nr: %lu cannot honour "
3399 "auto_increment_offset: %lu",
3400 (ulong) nr, variables->auto_increment_offset));
3401 return nr;
3402 }
3403 if (variables->auto_increment_increment == 1)
3404 return nr; // optimization of the formula below
3405 nr= (((nr - variables->auto_increment_offset)) /
3406 (ulonglong) variables->auto_increment_increment);
3407 return (nr * (ulonglong) variables->auto_increment_increment +
3408 variables->auto_increment_offset);
3409 }
3410
3411
3412 /**
3413 Update the auto_increment field if necessary.
3414
3415 Updates columns with type NEXT_NUMBER if:
3416
3417 - If column value is set to NULL (in which case
3418 auto_increment_field_not_null is 0)
3419 - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
3420 set. In the future we will only set NEXT_NUMBER fields if one sets them
3421 to NULL (or they are not included in the insert list).
3422
3423 In those cases, we check if the currently reserved interval still has
3424 values we have not used. If yes, we pick the smallest one and use it.
3425 Otherwise:
3426
3427 - If a list of intervals has been provided to the statement via SET
3428 INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
3429 first unused interval from this list, consider it as reserved.
3430
3431 - Otherwise we set the column for the first row to the value
3432 next_insert_id(get_auto_increment(column))) which is usually
3433 max-used-column-value+1.
3434 We call get_auto_increment() for the first row in a multi-row
3435 statement. get_auto_increment() will tell us the interval of values it
3436 reserved for us.
3437
3438 - In both cases, for the following rows we use those reserved values without
3439 calling the handler again (we just progress in the interval, computing
3440 each new value from the previous one). Until we have exhausted them, then
3441 we either take the next provided interval or call get_auto_increment()
3442 again to reserve a new interval.
3443
3444 - In both cases, the reserved intervals are remembered in
3445 thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
3446 binlogging; the last reserved interval is remembered in
3447 auto_inc_interval_for_cur_row. The number of reserved intervals is
3448 remembered in auto_inc_intervals_count. It differs from the number of
3449 elements in thd->auto_inc_intervals_in_cur_stmt_for_binlog() because the
3450 latter list is cumulative over all statements forming one binlog event
3451 (when stored functions and triggers are used), and collapses two
3452 contiguous intervals in one (see its append() method).
3453
3454 The idea is that generated auto_increment values are predictable and
3455 independent of the column values in the table. This is needed to be
3456 able to replicate into a table that already has rows with a higher
3457 auto-increment value than the one that is inserted.
3458
3459 After we have already generated an auto-increment number and the user
3460 inserts a column with a higher value than the last used one, we will
3461 start counting from the inserted value.
3462
3463 This function's "outputs" are: the table's auto_increment field is filled
3464 with a value, thd->next_insert_id is filled with the value to use for the
3465 next row, if a value was autogenerated for the current row it is stored in
3466 thd->insert_id_for_cur_row, if get_auto_increment() was called
3467 thd->auto_inc_interval_for_cur_row is modified, if that interval is not
3468 present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
3469 this list.
3470
3471 @todo
3472 Replace all references to "next number" or NEXT_NUMBER to
3473 "auto_increment", everywhere (see below: there is
3474 table->auto_increment_field_not_null, and there also exists
3475 table->next_number_field, it's not consistent).
3476
3477 @retval
3478 0 ok
3479 @retval
3480 HA_ERR_AUTOINC_READ_FAILED get_auto_increment() was called and
3481 returned ~(ulonglong) 0
3482 @retval
3483 HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
3484 failure.
3485 */
3486
3487 #define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
3488 #define AUTO_INC_DEFAULT_NB_MAX_BITS 16
3489 #define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
3490
update_auto_increment()3491 int handler::update_auto_increment()
3492 {
3493 ulonglong nr, nb_reserved_values;
3494 bool append= FALSE;
3495 THD *thd= table->in_use;
3496 struct system_variables *variables= &thd->variables;
3497 assert(table_share->tmp_table != NO_TMP_TABLE ||
3498 m_lock_type != F_UNLCK);
3499 DBUG_ENTER("handler::update_auto_increment");
3500
3501 /*
3502 next_insert_id is a "cursor" into the reserved interval, it may go greater
3503 than the interval, but not smaller.
3504 */
3505 assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
3506
3507 if ((nr= table->next_number_field->val_int()) != 0 ||
3508 (table->auto_increment_field_not_null &&
3509 thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO))
3510 {
3511 /*
3512 Update next_insert_id if we had already generated a value in this
3513 statement (case of INSERT VALUES(null),(3763),(null):
3514 the last NULL needs to insert 3764, not the value of the first NULL plus
3515 1).
3516 Also we should take into account the the sign of the value.
3517 Since auto_increment value can't have negative value we should update
3518 next_insert_id only in case when we INSERTing explicit positive value.
3519 It means that for a table that has SIGNED INTEGER column when we execute
3520 the following statement
3521 INSERT INTO t1 VALUES( NULL), (-1), (NULL)
3522 we shouldn't call adjust_next_insert_id_after_explicit_value()
3523 and the result row will be (1, -1, 2) (for new opened connection
3524 to the server). On the other hand, for the statement
3525 INSERT INTO t1 VALUES( NULL), (333), (NULL)
3526 we should call adjust_next_insert_id_after_explicit_value()
3527 and result row will be (1, 333, 334).
3528 */
3529 if (((Field_num*)table->next_number_field)->unsigned_flag ||
3530 ((longlong)nr) > 0)
3531 adjust_next_insert_id_after_explicit_value(nr);
3532
3533 insert_id_for_cur_row= 0; // didn't generate anything
3534 DBUG_RETURN(0);
3535 }
3536
3537 if (next_insert_id > table->next_number_field->get_max_int_value())
3538 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED);
3539
3540 if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
3541 {
3542 /* next_insert_id is beyond what is reserved, so we reserve more. */
3543 const Discrete_interval *forced=
3544 thd->auto_inc_intervals_forced.get_next();
3545 if (forced != NULL)
3546 {
3547 nr= forced->minimum();
3548 /*
3549 In a multi insert statement when the number of affected rows is known
3550 then reserve those many number of auto increment values. So that
3551 interval will be starting value to starting value + number of affected
3552 rows * increment of auto increment.
3553 */
3554 nb_reserved_values= (estimation_rows_to_insert > 0) ?
3555 estimation_rows_to_insert : forced->values();
3556 }
3557 else
3558 {
3559 /*
3560 handler::estimation_rows_to_insert was set by
3561 handler::ha_start_bulk_insert(); if 0 it means "unknown".
3562 */
3563 ulonglong nb_desired_values;
3564 /*
3565 If an estimation was given to the engine:
3566 - use it.
3567 - if we already reserved numbers, it means the estimation was
3568 not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
3569 time, twice that the 3rd time etc.
3570 If no estimation was given, use those increasing defaults from the
3571 start, starting from AUTO_INC_DEFAULT_NB_ROWS.
3572 Don't go beyond a max to not reserve "way too much" (because
3573 reservation means potentially losing unused values).
3574 Note that in prelocked mode no estimation is given.
3575 */
3576
3577 if ((auto_inc_intervals_count == 0) && (estimation_rows_to_insert > 0))
3578 nb_desired_values= estimation_rows_to_insert;
3579 else if ((auto_inc_intervals_count == 0) &&
3580 (thd->lex->bulk_insert_row_cnt > 0))
3581 {
3582 /*
3583 For multi-row inserts, if the bulk inserts cannot be started, the
3584 handler::estimation_rows_to_insert will not be set. But we still
3585 want to reserve the autoinc values.
3586 */
3587 nb_desired_values= thd->lex->bulk_insert_row_cnt;
3588 }
3589 else /* go with the increasing defaults */
3590 {
3591 /* avoid overflow in formula, with this if() */
3592 if (auto_inc_intervals_count <= AUTO_INC_DEFAULT_NB_MAX_BITS)
3593 {
3594 nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS *
3595 (1 << auto_inc_intervals_count);
3596 set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
3597 }
3598 else
3599 nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
3600 }
3601 /* This call ignores all its parameters but nr, currently */
3602 get_auto_increment(variables->auto_increment_offset,
3603 variables->auto_increment_increment,
3604 nb_desired_values, &nr,
3605 &nb_reserved_values);
3606 if (nr == ULLONG_MAX)
3607 DBUG_RETURN(HA_ERR_AUTOINC_READ_FAILED); // Mark failure
3608
3609 /*
3610 That rounding below should not be needed when all engines actually
3611 respect offset and increment in get_auto_increment(). But they don't
3612 so we still do it. Wonder if for the not-first-in-index we should do
3613 it. Hope that this rounding didn't push us out of the interval; even
3614 if it did we cannot do anything about it (calling the engine again
3615 will not help as we inserted no row).
3616 */
3617 nr= compute_next_insert_id(nr-1, variables);
3618 }
3619
3620 if (table->s->next_number_keypart == 0)
3621 {
3622 /* We must defer the appending until "nr" has been possibly truncated */
3623 append= TRUE;
3624 }
3625 else
3626 {
3627 /*
3628 For such auto_increment there is no notion of interval, just a
3629 singleton. The interval is not even stored in
3630 thd->auto_inc_interval_for_cur_row, so we are sure to call the engine
3631 for next row.
3632 */
3633 DBUG_PRINT("info",("auto_increment: special not-first-in-index"));
3634 }
3635 }
3636
3637 if (unlikely(nr == ULLONG_MAX))
3638 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3639
3640 DBUG_PRINT("info",("auto_increment: %lu", (ulong) nr));
3641
3642 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3643 {
3644 /*
3645 first test if the query was aborted due to strict mode constraints
3646 */
3647 if (thd->killed == THD::KILL_BAD_DATA)
3648 DBUG_RETURN(HA_ERR_AUTOINC_ERANGE);
3649
3650 /*
3651 field refused this value (overflow) and truncated it, use the result of
3652 the truncation (which is going to be inserted); however we try to
3653 decrease it to honour auto_increment_* variables.
3654 That will shift the left bound of the reserved interval, we don't
3655 bother shifting the right bound (anyway any other value from this
3656 interval will cause a duplicate key).
3657 */
3658 nr= prev_insert_id(table->next_number_field->val_int(), variables);
3659 if (unlikely(table->next_number_field->store((longlong) nr, TRUE)))
3660 nr= table->next_number_field->val_int();
3661 }
3662 if (append)
3663 {
3664 auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
3665 variables->auto_increment_increment);
3666 auto_inc_intervals_count++;
3667 /* Row-based replication does not need to store intervals in binlog */
3668 if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
3669 thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
3670 auto_inc_interval_for_cur_row.values(),
3671 variables->auto_increment_increment);
3672 }
3673
3674 /*
3675 Record this autogenerated value. If the caller then
3676 succeeds to insert this value, it will call
3677 record_first_successful_insert_id_in_cur_stmt()
3678 which will set first_successful_insert_id_in_cur_stmt if it's not
3679 already set.
3680 */
3681 insert_id_for_cur_row= nr;
3682 /*
3683 Set next insert id to point to next auto-increment value to be able to
3684 handle multi-row statements.
3685 */
3686 set_next_insert_id(compute_next_insert_id(nr, variables));
3687
3688 DBUG_RETURN(0);
3689 }
3690
3691
3692 /** @brief
3693 MySQL signal that it changed the column bitmap
3694
3695 USAGE
3696 This is for handlers that needs to setup their own column bitmaps.
3697 Normally the handler should set up their own column bitmaps in
3698 index_init() or rnd_init() and in any column_bitmaps_signal() call after
3699 this.
3700
3701 The handler is allowd to do changes to the bitmap after a index_init or
3702 rnd_init() call is made as after this, MySQL will not use the bitmap
3703 for any program logic checking.
3704 */
column_bitmaps_signal()3705 void handler::column_bitmaps_signal()
3706 {
3707 DBUG_ENTER("column_bitmaps_signal");
3708 DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx", (long) table->read_set,
3709 (long)table->write_set));
3710 DBUG_VOID_RETURN;
3711 }
3712
3713
3714 /**
3715 Reserves an interval of auto_increment values from the handler.
3716
3717 @param offset offset (modulus increment)
3718 @param increment increment between calls
3719 @param nb_desired_values how many values we want
3720 @param[out] first_value the first value reserved by the handler
3721 @param[out] nb_reserved_values how many values the handler reserved
3722
3723 offset and increment means that we want values to be of the form
3724 offset + N * increment, where N>=0 is integer.
3725 If the function sets *first_value to ULLONG_MAX it means an error.
3726 If the function sets *nb_reserved_values to ULLONG_MAX it means it has
3727 reserved to "positive infinite".
3728 */
3729
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3730 void handler::get_auto_increment(ulonglong offset, ulonglong increment,
3731 ulonglong nb_desired_values,
3732 ulonglong *first_value,
3733 ulonglong *nb_reserved_values)
3734 {
3735 ulonglong nr;
3736 int error;
3737 DBUG_ENTER("handler::get_auto_increment");
3738
3739 (void) extra(HA_EXTRA_KEYREAD);
3740 table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
3741 table->read_set);
3742 column_bitmaps_signal();
3743
3744 if (ha_index_init(table->s->next_number_index, 1))
3745 {
3746 /* This should never happen, assert in debug, and fail in release build */
3747 assert(0);
3748 *first_value= ULLONG_MAX;
3749 DBUG_VOID_RETURN;
3750 }
3751
3752 if (table->s->next_number_keypart == 0)
3753 { // Autoincrement at key-start
3754 error= ha_index_last(table->record[1]);
3755 /*
3756 MySQL implicitely assumes such method does locking (as MySQL decides to
3757 use nr+increment without checking again with the handler, in
3758 handler::update_auto_increment()), so reserves to infinite.
3759 */
3760 *nb_reserved_values= ULLONG_MAX;
3761 }
3762 else
3763 {
3764 uchar key[MAX_KEY_LENGTH];
3765 key_copy(key, table->record[0],
3766 table->key_info + table->s->next_number_index,
3767 table->s->next_number_key_offset);
3768 error= ha_index_read_map(table->record[1], key,
3769 make_prev_keypart_map(table->s->next_number_keypart),
3770 HA_READ_PREFIX_LAST);
3771 /*
3772 MySQL needs to call us for next row: assume we are inserting ("a",null)
3773 here, we return 3, and next this statement will want to insert
3774 ("b",null): there is no reason why ("b",3+1) would be the good row to
3775 insert: maybe it already exists, maybe 3+1 is too large...
3776 */
3777 *nb_reserved_values= 1;
3778 }
3779
3780 if (error)
3781 {
3782 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
3783 {
3784 /* No entry found, start with 1. */
3785 nr= 1;
3786 }
3787 else
3788 {
3789 assert(0);
3790 nr= ULLONG_MAX;
3791 }
3792 }
3793 else
3794 nr= ((ulonglong) table->next_number_field->
3795 val_int_offset(table->s->rec_buff_length)+1);
3796 ha_index_end();
3797 (void) extra(HA_EXTRA_NO_KEYREAD);
3798 *first_value= nr;
3799 DBUG_VOID_RETURN;
3800 }
3801
3802
ha_release_auto_increment()3803 void handler::ha_release_auto_increment()
3804 {
3805 assert(table_share->tmp_table != NO_TMP_TABLE ||
3806 m_lock_type != F_UNLCK ||
3807 (!next_insert_id && !insert_id_for_cur_row));
3808 DEBUG_SYNC(ha_thd(), "release_auto_increment");
3809 release_auto_increment();
3810 insert_id_for_cur_row= 0;
3811 auto_inc_interval_for_cur_row.replace(0, 0, 0);
3812 auto_inc_intervals_count= 0;
3813 if (next_insert_id > 0)
3814 {
3815 next_insert_id= 0;
3816 /*
3817 this statement used forced auto_increment values if there were some,
3818 wipe them away for other statements.
3819 */
3820 table->in_use->auto_inc_intervals_forced.empty();
3821 }
3822 }
3823
3824
3825 /**
3826 Construct and emit duplicate key error message using information
3827 from table's record buffer.
3828
3829 @param table TABLE object which record buffer should be used as
3830 source for column values.
3831 @param key Key description.
3832 @param msg Error message template to which key value should be
3833 added.
3834 @param errflag Flags for my_error() call.
3835 */
3836
print_keydup_error(TABLE * table,KEY * key,const char * msg,myf errflag)3837 void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
3838 {
3839 /* Write the duplicated key in the error message */
3840 char key_buff[MAX_KEY_LENGTH];
3841 String str(key_buff,sizeof(key_buff),system_charset_info);
3842
3843 if (key == NULL)
3844 {
3845 /* Key is unknown */
3846 str.copy("", 0, system_charset_info);
3847 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr(), "*UNKNOWN*");
3848 }
3849 else
3850 {
3851 /* Table is opened and defined at this point */
3852 key_unpack(&str,table, key);
3853 size_t max_length= MYSQL_ERRMSG_SIZE - strlen(msg);
3854 if (str.length() >= max_length)
3855 {
3856 str.length(max_length-4);
3857 str.append(STRING_WITH_LEN("..."));
3858 }
3859 my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
3860 }
3861 }
3862
3863
3864 /**
3865 Construct and emit duplicate key error message using information
3866 from table's record buffer.
3867
3868 @sa print_keydup_error(table, key, msg, errflag).
3869 */
3870
print_keydup_error(TABLE * table,KEY * key,myf errflag)3871 void print_keydup_error(TABLE *table, KEY *key, myf errflag)
3872 {
3873 print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
3874 }
3875
3876
3877 /**
3878 This method is used to analyse the error to see whether the error
3879 is ignorable or not. Further comments in header file.
3880 */
3881
is_ignorable_error(int error)3882 bool handler::is_ignorable_error(int error)
3883 {
3884 DBUG_ENTER("is_ignorable_error");
3885
3886 // Catch errors that are ignorable
3887 switch (error)
3888 {
3889 // Error code 0 is not an error.
3890 case 0:
3891 // Dup key errors may be explicitly ignored.
3892 case HA_ERR_FOUND_DUPP_KEY:
3893 case HA_ERR_FOUND_DUPP_UNIQUE:
3894 // Foreign key constraint violations are ignorable.
3895 case HA_ERR_ROW_IS_REFERENCED:
3896 case HA_ERR_NO_REFERENCED_ROW:
3897 DBUG_RETURN(true);
3898 }
3899
3900 // Default is that an error is not ignorable.
3901 DBUG_RETURN(false);
3902 }
3903
3904
3905 /**
3906 This method is used to analyse the error to see whether the error
3907 is fatal or not. Further comments in header file.
3908 */
3909
is_fatal_error(int error)3910 bool handler::is_fatal_error(int error)
3911 {
3912 DBUG_ENTER("is_fatal_error");
3913
3914 // No ignorable errors are fatal
3915 if (is_ignorable_error(error))
3916 DBUG_RETURN(false);
3917
3918 // Catch errors that are not fatal
3919 switch (error)
3920 {
3921 /*
3922 Deadlock and lock timeout cause transaction/statement rollback so that
3923 THD::is_fatal_sub_stmt_error will be set. This means that they will not
3924 be possible to handle by stored program handlers inside stored functions
3925 and triggers even if non-fatal.
3926 */
3927 case HA_ERR_LOCK_WAIT_TIMEOUT:
3928 case HA_ERR_LOCK_DEADLOCK:
3929 DBUG_RETURN(false);
3930
3931 case HA_ERR_NULL_IN_SPATIAL:
3932 DBUG_RETURN(false);
3933 }
3934
3935 // Default is that an error is fatal
3936 DBUG_RETURN(true);
3937 }
3938
3939
3940 /**
3941 Print error that we got from handler function.
3942
3943 @note
3944 In case of delete table it's only safe to use the following parts of
3945 the 'table' structure:
3946 - table->s->path
3947 - table->alias
3948 */
print_error(int error,myf errflag)3949 void handler::print_error(int error, myf errflag)
3950 {
3951 DBUG_ENTER("handler::print_error");
3952 DBUG_PRINT("enter",("error: %d",error));
3953
3954 int textno=ER_GET_ERRNO;
3955 switch (error) {
3956 case EACCES:
3957 textno=ER_OPEN_AS_READONLY;
3958 break;
3959 case EAGAIN:
3960 textno=ER_FILE_USED;
3961 break;
3962 case ENOENT:
3963 {
3964 char errbuf[MYSYS_STRERROR_SIZE];
3965 textno=ER_FILE_NOT_FOUND;
3966 my_error(textno, errflag, table_share->table_name.str,
3967 error, my_strerror(errbuf, sizeof(errbuf), error));
3968 }
3969 break;
3970 case HA_ERR_KEY_NOT_FOUND:
3971 case HA_ERR_NO_ACTIVE_RECORD:
3972 case HA_ERR_RECORD_DELETED:
3973 case HA_ERR_END_OF_FILE:
3974 textno=ER_KEY_NOT_FOUND;
3975 break;
3976 case HA_ERR_WRONG_MRG_TABLE_DEF:
3977 textno=ER_WRONG_MRG_TABLE;
3978 break;
3979 case HA_ERR_FOUND_DUPP_KEY:
3980 {
3981 uint key_nr= table ? get_dup_key(error) : -1;
3982 if ((int) key_nr >= 0)
3983 {
3984 print_keydup_error(table,
3985 key_nr == MAX_KEY ? NULL : &table->key_info[key_nr],
3986 errflag);
3987 DBUG_VOID_RETURN;
3988 }
3989 textno=ER_DUP_KEY;
3990 break;
3991 }
3992 case HA_ERR_FOREIGN_DUPLICATE_KEY:
3993 {
3994 assert(table_share->tmp_table != NO_TMP_TABLE ||
3995 m_lock_type != F_UNLCK);
3996
3997 char rec_buf[MAX_KEY_LENGTH];
3998 String rec(rec_buf, sizeof(rec_buf), system_charset_info);
3999 /* Table is opened and defined at this point */
4000
4001 /*
4002 Just print the subset of fields that are part of the first index,
4003 printing the whole row from there is not easy.
4004 */
4005 key_unpack(&rec, table, &table->key_info[0]);
4006
4007 char child_table_name[NAME_LEN + 1];
4008 char child_key_name[NAME_LEN + 1];
4009 if (get_foreign_dup_key(child_table_name, sizeof(child_table_name),
4010 child_key_name, sizeof(child_key_name)))
4011 {
4012 my_error(ER_FOREIGN_DUPLICATE_KEY_WITH_CHILD_INFO, errflag,
4013 table_share->table_name.str, rec.c_ptr_safe(),
4014 child_table_name, child_key_name);
4015 }
4016 else
4017 {
4018 my_error(ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO, errflag,
4019 table_share->table_name.str, rec.c_ptr_safe());
4020 }
4021 DBUG_VOID_RETURN;
4022 }
4023 case HA_ERR_NULL_IN_SPATIAL:
4024 my_error(ER_CANT_CREATE_GEOMETRY_OBJECT, errflag);
4025 DBUG_VOID_RETURN;
4026 case HA_ERR_FOUND_DUPP_UNIQUE:
4027 textno=ER_DUP_UNIQUE;
4028 break;
4029 case HA_ERR_RECORD_CHANGED:
4030 textno=ER_CHECKREAD;
4031 break;
4032 case HA_ERR_CRASHED:
4033 textno=ER_NOT_KEYFILE;
4034 break;
4035 case HA_ERR_WRONG_IN_RECORD:
4036 textno= ER_CRASHED_ON_USAGE;
4037 break;
4038 case HA_ERR_CRASHED_ON_USAGE:
4039 textno=ER_CRASHED_ON_USAGE;
4040 break;
4041 case HA_ERR_NOT_A_TABLE:
4042 textno= error;
4043 break;
4044 case HA_ERR_CRASHED_ON_REPAIR:
4045 textno=ER_CRASHED_ON_REPAIR;
4046 break;
4047 case HA_ERR_OUT_OF_MEM:
4048 textno=ER_OUT_OF_RESOURCES;
4049 break;
4050 case HA_ERR_SE_OUT_OF_MEMORY:
4051 my_error(ER_ENGINE_OUT_OF_MEMORY, errflag,
4052 table->file->table_type());
4053 DBUG_VOID_RETURN;
4054 case HA_ERR_WRONG_COMMAND:
4055 textno=ER_ILLEGAL_HA;
4056 break;
4057 case HA_ERR_OLD_FILE:
4058 textno=ER_OLD_KEYFILE;
4059 break;
4060 case HA_ERR_UNSUPPORTED:
4061 textno=ER_UNSUPPORTED_EXTENSION;
4062 break;
4063 case HA_ERR_RECORD_FILE_FULL:
4064 case HA_ERR_INDEX_FILE_FULL:
4065 {
4066 textno=ER_RECORD_FILE_FULL;
4067 /* Write the error message to error log */
4068 errflag|= ME_ERRORLOG;
4069 break;
4070 }
4071 case HA_ERR_LOCK_WAIT_TIMEOUT:
4072 textno=ER_LOCK_WAIT_TIMEOUT;
4073 break;
4074 case HA_ERR_LOCK_TABLE_FULL:
4075 textno=ER_LOCK_TABLE_FULL;
4076 break;
4077 case HA_ERR_LOCK_DEADLOCK:
4078 textno=ER_LOCK_DEADLOCK;
4079 break;
4080 case HA_ERR_READ_ONLY_TRANSACTION:
4081 textno=ER_READ_ONLY_TRANSACTION;
4082 break;
4083 case HA_ERR_CANNOT_ADD_FOREIGN:
4084 textno=ER_CANNOT_ADD_FOREIGN;
4085 break;
4086 case HA_ERR_ROW_IS_REFERENCED:
4087 {
4088 String str;
4089 get_error_message(error, &str);
4090 my_error(ER_ROW_IS_REFERENCED_2, errflag, str.c_ptr_safe());
4091 DBUG_VOID_RETURN;
4092 }
4093 case HA_ERR_NO_REFERENCED_ROW:
4094 {
4095 String str;
4096 get_error_message(error, &str);
4097 my_error(ER_NO_REFERENCED_ROW_2, errflag, str.c_ptr_safe());
4098 DBUG_VOID_RETURN;
4099 }
4100 case HA_ERR_TABLE_DEF_CHANGED:
4101 textno=ER_TABLE_DEF_CHANGED;
4102 break;
4103 case HA_ERR_NO_SUCH_TABLE:
4104 my_error(ER_NO_SUCH_TABLE, errflag, table_share->db.str,
4105 table_share->table_name.str);
4106 DBUG_VOID_RETURN;
4107 case HA_ERR_RBR_LOGGING_FAILED:
4108 textno= ER_BINLOG_ROW_LOGGING_FAILED;
4109 break;
4110 case HA_ERR_DROP_INDEX_FK:
4111 {
4112 const char *ptr= "???";
4113 uint key_nr= table ? get_dup_key(error) : -1;
4114 if ((int) key_nr >= 0 && key_nr != MAX_KEY)
4115 ptr= table->key_info[key_nr].name;
4116 my_error(ER_DROP_INDEX_FK, errflag, ptr);
4117 DBUG_VOID_RETURN;
4118 }
4119 case HA_ERR_TABLE_NEEDS_UPGRADE:
4120 textno=ER_TABLE_NEEDS_UPGRADE;
4121 break;
4122 case HA_ERR_NO_PARTITION_FOUND:
4123 textno=ER_WRONG_PARTITION_NAME;
4124 break;
4125 case HA_ERR_TABLE_READONLY:
4126 textno= ER_OPEN_AS_READONLY;
4127 break;
4128 case HA_ERR_AUTOINC_READ_FAILED:
4129 textno= ER_AUTOINC_READ_FAILED;
4130 break;
4131 case HA_ERR_AUTOINC_ERANGE:
4132 textno= ER_WARN_DATA_OUT_OF_RANGE;
4133 break;
4134 case HA_ERR_TOO_MANY_CONCURRENT_TRXS:
4135 textno= ER_TOO_MANY_CONCURRENT_TRXS;
4136 break;
4137 case HA_ERR_INDEX_COL_TOO_LONG:
4138 textno= ER_INDEX_COLUMN_TOO_LONG;
4139 break;
4140 case HA_ERR_NOT_IN_LOCK_PARTITIONS:
4141 textno=ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET;
4142 break;
4143 case HA_ERR_INDEX_CORRUPT:
4144 textno= ER_INDEX_CORRUPT;
4145 break;
4146 case HA_ERR_UNDO_REC_TOO_BIG:
4147 textno= ER_UNDO_RECORD_TOO_BIG;
4148 break;
4149 case HA_ERR_TABLE_IN_FK_CHECK:
4150 textno= ER_TABLE_IN_FK_CHECK;
4151 break;
4152 case HA_WRONG_CREATE_OPTION:
4153 textno= ER_ILLEGAL_HA;
4154 break;
4155 case HA_MISSING_CREATE_OPTION:
4156 {
4157 const char* engine= table_type();
4158 my_error(ER_MISSING_HA_CREATE_OPTION, errflag, engine);
4159 DBUG_VOID_RETURN;
4160 }
4161 case HA_ERR_TOO_MANY_FIELDS:
4162 textno= ER_TOO_MANY_FIELDS;
4163 break;
4164 case HA_ERR_INNODB_READ_ONLY:
4165 textno= ER_INNODB_READ_ONLY;
4166 break;
4167 case HA_ERR_TEMP_FILE_WRITE_FAILURE:
4168 textno= ER_TEMP_FILE_WRITE_FAILURE;
4169 break;
4170 case HA_ERR_INNODB_FORCED_RECOVERY:
4171 textno= ER_INNODB_FORCED_RECOVERY;
4172 break;
4173 case HA_ERR_TABLE_CORRUPT:
4174 my_error(ER_TABLE_CORRUPT, errflag, table_share->db.str,
4175 table_share->table_name.str);
4176 DBUG_VOID_RETURN;
4177 case HA_ERR_QUERY_INTERRUPTED:
4178 textno= ER_QUERY_INTERRUPTED;
4179 break;
4180 case HA_ERR_TABLESPACE_MISSING:
4181 {
4182 char errbuf[MYSYS_STRERROR_SIZE];
4183 my_snprintf(errbuf, MYSYS_STRERROR_SIZE, "`%s`.`%s`", table_share->db.str,
4184 table_share->table_name.str);
4185 my_error(ER_TABLESPACE_MISSING, errflag, errbuf, error);
4186 DBUG_VOID_RETURN;
4187 }
4188 case HA_ERR_TABLESPACE_IS_NOT_EMPTY:
4189 my_error(ER_TABLESPACE_IS_NOT_EMPTY, errflag, table_share->db.str,
4190 table_share->table_name.str);
4191 DBUG_VOID_RETURN;
4192 case HA_ERR_WRONG_FILE_NAME:
4193 my_error(ER_WRONG_FILE_NAME, errflag, table_share->table_name.str);
4194 DBUG_VOID_RETURN;
4195 case HA_ERR_NOT_ALLOWED_COMMAND:
4196 textno=ER_NOT_ALLOWED_COMMAND;
4197 break;
4198 default:
4199 {
4200 /* The error was "unknown" to this function.
4201 Ask handler if it has got a message for this error */
4202 String str;
4203 bool temporary= get_error_message(error, &str);
4204 if (!str.is_empty())
4205 {
4206 const char* engine= table_type();
4207 if (temporary)
4208 my_error(ER_GET_TEMPORARY_ERRMSG, errflag, error, str.ptr(), engine);
4209 else
4210 my_error(ER_GET_ERRMSG, errflag, error, str.ptr(), engine);
4211 }
4212 else
4213 my_error(ER_GET_ERRNO,errflag,error);
4214 DBUG_VOID_RETURN;
4215 }
4216 }
4217 if (textno != ER_FILE_NOT_FOUND)
4218 my_error(textno, errflag, table_share->table_name.str, error);
4219 DBUG_VOID_RETURN;
4220 }
4221
4222
4223 /**
4224 Return an error message specific to this handler.
4225
4226 @param error error code previously returned by handler
4227 @param buf pointer to String where to add error message
4228
4229 @return
4230 Returns true if this is a temporary error
4231 */
get_error_message(int error,String * buf)4232 bool handler::get_error_message(int error, String* buf)
4233 {
4234 return FALSE;
4235 }
4236
4237
4238 /**
4239 Check for incompatible collation changes.
4240
4241 @retval
4242 HA_ADMIN_NEEDS_UPGRADE Table may have data requiring upgrade.
4243 @retval
4244 0 No upgrade required.
4245 */
4246
check_collation_compatibility()4247 int handler::check_collation_compatibility()
4248 {
4249 ulong mysql_version= table->s->mysql_version;
4250
4251 if (mysql_version < 50124)
4252 {
4253 KEY *key= table->key_info;
4254 KEY *key_end= key + table->s->keys;
4255 for (; key < key_end; key++)
4256 {
4257 KEY_PART_INFO *key_part= key->key_part;
4258 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
4259 for (; key_part < key_part_end; key_part++)
4260 {
4261 if (!key_part->fieldnr)
4262 continue;
4263 Field *field= table->field[key_part->fieldnr - 1];
4264 uint cs_number= field->charset()->number;
4265 if ((mysql_version < 50048 &&
4266 (cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
4267 cs_number == 41 || /* latin7_general_ci - bug #29461 */
4268 cs_number == 42 || /* latin7_general_cs - bug #29461 */
4269 cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
4270 cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
4271 cs_number == 22 || /* koi8u_general_ci - bug #29461 */
4272 cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
4273 cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
4274 (mysql_version < 50124 &&
4275 (cs_number == 33 || /* utf8_general_ci - bug #27877 */
4276 cs_number == 35))) /* ucs2_general_ci - bug #27877 */
4277 return HA_ADMIN_NEEDS_UPGRADE;
4278 }
4279 }
4280 }
4281 return 0;
4282 }
4283
4284
ha_check_for_upgrade(HA_CHECK_OPT * check_opt)4285 int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
4286 {
4287 int error;
4288 KEY *keyinfo, *keyend;
4289 KEY_PART_INFO *keypart, *keypartend;
4290
4291 if (!table->s->mysql_version)
4292 {
4293 /* check for blob-in-key error */
4294 keyinfo= table->key_info;
4295 keyend= table->key_info + table->s->keys;
4296 for (; keyinfo < keyend; keyinfo++)
4297 {
4298 keypart= keyinfo->key_part;
4299 keypartend= keypart + keyinfo->user_defined_key_parts;
4300 for (; keypart < keypartend; keypart++)
4301 {
4302 if (!keypart->fieldnr)
4303 continue;
4304 Field *field= table->field[keypart->fieldnr-1];
4305 if (field->type() == MYSQL_TYPE_BLOB)
4306 {
4307 if (check_opt->sql_flags & TT_FOR_UPGRADE)
4308 check_opt->flags= T_MEDIUM;
4309 return HA_ADMIN_NEEDS_CHECK;
4310 }
4311 }
4312 }
4313 }
4314 if (table->s->frm_version != FRM_VER_TRUE_VARCHAR)
4315 return HA_ADMIN_NEEDS_ALTER;
4316
4317 if ((error= check_collation_compatibility()))
4318 return error;
4319
4320 return check_for_upgrade(check_opt);
4321 }
4322
4323
check_old_types()4324 int handler::check_old_types()
4325 {
4326 Field** field;
4327
4328 for (field= table->field; (*field); field++)
4329 {
4330 if (table->s->mysql_version == 0) // prior to MySQL 5.0
4331 {
4332 /* check for bad DECIMAL field */
4333 if ((*field)->type() == MYSQL_TYPE_NEWDECIMAL)
4334 {
4335 return HA_ADMIN_NEEDS_ALTER;
4336 }
4337 if ((*field)->type() == MYSQL_TYPE_VAR_STRING)
4338 {
4339 return HA_ADMIN_NEEDS_ALTER;
4340 }
4341 }
4342
4343 /*
4344 Check for old DECIMAL field.
4345
4346 Above check does not take into account for pre 5.0 decimal types which can
4347 be present in the data directory if user did in-place upgrade from
4348 mysql-4.1 to mysql-5.0.
4349 */
4350 if ((*field)->type() == MYSQL_TYPE_DECIMAL)
4351 {
4352 return HA_ADMIN_NEEDS_DUMP_UPGRADE;
4353 }
4354
4355 if ((*field)->type() == MYSQL_TYPE_YEAR && (*field)->field_length == 2)
4356 return HA_ADMIN_NEEDS_ALTER; // obsolete YEAR(2) type
4357
4358 //Check for old temporal format if avoid_temporal_upgrade is disabled.
4359 mysql_mutex_lock(&LOCK_global_system_variables);
4360 bool check_temporal_upgrade= !avoid_temporal_upgrade;
4361 mysql_mutex_unlock(&LOCK_global_system_variables);
4362
4363 if (check_temporal_upgrade)
4364 {
4365 if (((*field)->real_type() == MYSQL_TYPE_TIME) ||
4366 ((*field)->real_type() == MYSQL_TYPE_DATETIME) ||
4367 ((*field)->real_type() == MYSQL_TYPE_TIMESTAMP))
4368 return HA_ADMIN_NEEDS_ALTER;
4369 }
4370 }
4371 return 0;
4372 }
4373
4374
update_frm_version(TABLE * table)4375 static bool update_frm_version(TABLE *table)
4376 {
4377 char path[FN_REFLEN];
4378 File file;
4379 int result= 1;
4380 DBUG_ENTER("update_frm_version");
4381
4382 /*
4383 No need to update frm version in case table was created or checked
4384 by server with the same version. This also ensures that we do not
4385 update frm version for temporary tables as this code doesn't support
4386 temporary tables.
4387 */
4388 if (table->s->mysql_version == MYSQL_VERSION_ID)
4389 DBUG_RETURN(0);
4390
4391 strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
4392
4393 if ((file= mysql_file_open(key_file_frm,
4394 path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
4395 {
4396 uchar version[4];
4397
4398 int4store(version, MYSQL_VERSION_ID);
4399
4400 if ((result= mysql_file_pwrite(file, (uchar*) version, 4, 51L, MYF_RW)))
4401 goto err;
4402
4403 table->s->mysql_version= MYSQL_VERSION_ID;
4404 }
4405 err:
4406 if (file >= 0)
4407 (void) mysql_file_close(file, MYF(MY_WME));
4408 DBUG_RETURN(result);
4409 }
4410
4411
4412
4413 /**
4414 @return
4415 key if error because of duplicated keys
4416 */
get_dup_key(int error)4417 uint handler::get_dup_key(int error)
4418 {
4419 assert(table_share->tmp_table != NO_TMP_TABLE ||
4420 m_lock_type != F_UNLCK);
4421 DBUG_ENTER("handler::get_dup_key");
4422 table->file->errkey = (uint) -1;
4423 if (error == HA_ERR_FOUND_DUPP_KEY ||
4424 error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL ||
4425 error == HA_ERR_DROP_INDEX_FK)
4426 table->file->info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
4427 DBUG_RETURN(table->file->errkey);
4428 }
4429
4430
4431 /**
4432 Delete all files with extension from bas_ext().
4433
4434 @param name Base name of table
4435
4436 @note
4437 We assume that the handler may return more extensions than
4438 was actually used for the file.
4439
4440 @retval
4441 0 If we successfully deleted at least one file from base_ext and
4442 didn't get any other errors than ENOENT
4443 @retval
4444 !0 Error
4445 */
delete_table(const char * name)4446 int handler::delete_table(const char *name)
4447 {
4448 int saved_error= 0;
4449 int error= 0;
4450 int enoent_or_zero= ENOENT; // Error if no file was deleted
4451 char buff[FN_REFLEN];
4452 assert(m_lock_type == F_UNLCK);
4453
4454 for (const char **ext=bas_ext(); *ext ; ext++)
4455 {
4456 fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
4457 if (mysql_file_delete_with_symlink(key_file_misc, buff, MYF(0)))
4458 {
4459 if (my_errno() != ENOENT)
4460 {
4461 /*
4462 If error on the first existing file, return the error.
4463 Otherwise delete as much as possible.
4464 */
4465 if (enoent_or_zero)
4466 return my_errno();
4467 saved_error= my_errno();
4468 }
4469 }
4470 else
4471 enoent_or_zero= 0; // No error for ENOENT
4472 error= enoent_or_zero;
4473 }
4474 return saved_error ? saved_error : error;
4475 }
4476
4477
rename_table(const char * from,const char * to)4478 int handler::rename_table(const char * from, const char * to)
4479 {
4480 int error= 0;
4481 const char **ext, **start_ext;
4482 start_ext= bas_ext();
4483 for (ext= start_ext; *ext ; ext++)
4484 {
4485 if (rename_file_ext(from, to, *ext))
4486 {
4487 error= my_errno();
4488 if (error != ENOENT)
4489 break;
4490 error= 0;
4491 }
4492 }
4493 if (error)
4494 {
4495 /* Try to revert the rename. Ignore errors. */
4496 for (; ext >= start_ext; ext--)
4497 rename_file_ext(to, from, *ext);
4498 }
4499 return error;
4500 }
4501
4502
drop_table(const char * name)4503 void handler::drop_table(const char *name)
4504 {
4505 close();
4506 delete_table(name);
4507 }
4508
4509
4510 /**
4511 Performs checks upon the table.
4512
4513 @param thd thread doing CHECK TABLE operation
4514 @param check_opt options from the parser
4515
4516 @retval
4517 HA_ADMIN_OK Successful upgrade
4518 @retval
4519 HA_ADMIN_NEEDS_UPGRADE Table has structures requiring upgrade
4520 @retval
4521 HA_ADMIN_NEEDS_ALTER Table has structures requiring ALTER TABLE
4522 @retval
4523 HA_ADMIN_NOT_IMPLEMENTED
4524 */
ha_check(THD * thd,HA_CHECK_OPT * check_opt)4525 int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
4526 {
4527 int error;
4528 bool skip_version_update = false;
4529 bool is_upgrade = check_opt->sql_flags & TT_FOR_UPGRADE;
4530
4531 assert(table_share->tmp_table != NO_TMP_TABLE ||
4532 m_lock_type != F_UNLCK);
4533
4534 if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
4535 (check_opt->sql_flags & TT_FOR_UPGRADE))
4536 return 0;
4537
4538 if (table->s->mysql_version < MYSQL_VERSION_ID)
4539 {
4540 if ((error= check_old_types()))
4541 return error;
4542
4543 error= ha_check_for_upgrade(check_opt);
4544 switch (error)
4545 {
4546 case HA_ADMIN_NEEDS_UPG_PART:
4547 /* Skip version update as the table needs upgrade. */
4548 skip_version_update= true;
4549 /* Fall through */
4550 case HA_ADMIN_OK:
4551 if (is_upgrade)
4552 return error;
4553 /* Fall through */
4554 case HA_ADMIN_NEEDS_CHECK:
4555 break;
4556 default:
4557 return error;
4558 }
4559 }
4560
4561 if ((error= check(thd, check_opt)))
4562 return error;
4563 /* Skip updating frm version if not main handler. */
4564 if (table->file != this || skip_version_update)
4565 return error;
4566 return update_frm_version(table);
4567 }
4568
4569 void
mark_trx_noop_dml()4570 handler::mark_trx_noop_dml()
4571 {
4572 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4573 /*
4574 When a storage engine method is called, the transaction must
4575 have been started, unless it's a DDL call, for which the
4576 storage engine starts the transaction internally, and commits
4577 it internally, without registering in the ha_list.
4578 Unfortunately here we can't know for sure if the engine
4579 has registered the transaction or not, so we must check.
4580 */
4581 if (ha_info->is_started())
4582 {
4583 assert(has_transactions());
4584 /*
4585 table_share can be NULL in ha_delete_table(). See implementation
4586 of standalone function ha_delete_table() in sql_base.cc.
4587 */
4588 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4589 ha_info->set_trx_noop_read_write();
4590 }
4591 }
4592
4593 /**
4594 A helper function to mark a transaction read-write,
4595 if it is started.
4596 */
4597
4598 void
mark_trx_read_write()4599 handler::mark_trx_read_write()
4600 {
4601 Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
4602 /*
4603 When a storage engine method is called, the transaction must
4604 have been started, unless it's a DDL call, for which the
4605 storage engine starts the transaction internally, and commits
4606 it internally, without registering in the ha_list.
4607 Unfortunately here we can't know for sure if the engine
4608 has registered the transaction or not, so we must check.
4609 */
4610 if (ha_info->is_started())
4611 {
4612 assert(has_transactions());
4613 /*
4614 table_share can be NULL in ha_delete_table(). See implementation
4615 of standalone function ha_delete_table() in sql_base.cc.
4616 */
4617 if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
4618 ha_info->set_trx_read_write();
4619 }
4620 }
4621
4622
4623 /**
4624 Repair table: public interface.
4625
4626 @sa handler::repair()
4627 */
4628
ha_repair(THD * thd,HA_CHECK_OPT * check_opt)4629 int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
4630 {
4631 int result;
4632 mark_trx_read_write();
4633
4634 result= repair(thd, check_opt);
4635 assert(result == HA_ADMIN_NOT_IMPLEMENTED ||
4636 ha_table_flags() & HA_CAN_REPAIR);
4637
4638 int old_types_error= check_old_types();
4639
4640 if (old_types_error != HA_ADMIN_NEEDS_DUMP_UPGRADE && result == HA_ADMIN_OK)
4641 result= update_frm_version(table);
4642 return result;
4643 }
4644
4645
4646 /**
4647 Start bulk insert.
4648
4649 Allow the handler to optimize for multiple row insert.
4650
4651 @param rows Estimated rows to insert
4652 */
4653
ha_start_bulk_insert(ha_rows rows)4654 void handler::ha_start_bulk_insert(ha_rows rows)
4655 {
4656 DBUG_ENTER("handler::ha_start_bulk_insert");
4657 assert(table_share->tmp_table != NO_TMP_TABLE ||
4658 m_lock_type == F_WRLCK);
4659 estimation_rows_to_insert= rows;
4660 start_bulk_insert(rows);
4661 DBUG_VOID_RETURN;
4662 }
4663
4664
4665 /**
4666 End bulk insert.
4667
4668 @return Operation status
4669 @retval 0 Success
4670 @retval != 0 Failure (error code returned)
4671 */
4672
ha_end_bulk_insert()4673 int handler::ha_end_bulk_insert()
4674 {
4675 DBUG_ENTER("handler::ha_end_bulk_insert");
4676 assert(table_share->tmp_table != NO_TMP_TABLE ||
4677 m_lock_type == F_WRLCK);
4678 estimation_rows_to_insert= 0;
4679 DBUG_RETURN(end_bulk_insert());
4680 }
4681
4682
4683 /**
4684 Bulk update row: public interface.
4685
4686 @sa handler::bulk_update_row()
4687 */
4688
4689 int
ha_bulk_update_row(const uchar * old_data,uchar * new_data,uint * dup_key_found)4690 handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
4691 uint *dup_key_found)
4692 {
4693 assert(table_share->tmp_table != NO_TMP_TABLE ||
4694 m_lock_type == F_WRLCK);
4695 mark_trx_read_write();
4696
4697 return bulk_update_row(old_data, new_data, dup_key_found);
4698 }
4699
4700
4701 /**
4702 Delete all rows: public interface.
4703
4704 @sa handler::delete_all_rows()
4705 */
4706
4707 int
ha_delete_all_rows()4708 handler::ha_delete_all_rows()
4709 {
4710 assert(table_share->tmp_table != NO_TMP_TABLE ||
4711 m_lock_type == F_WRLCK);
4712 mark_trx_read_write();
4713
4714 return delete_all_rows();
4715 }
4716
4717
4718 /**
4719 Truncate table: public interface.
4720
4721 @sa handler::truncate()
4722 */
4723
4724 int
ha_truncate()4725 handler::ha_truncate()
4726 {
4727 assert(table_share->tmp_table != NO_TMP_TABLE ||
4728 m_lock_type == F_WRLCK);
4729 mark_trx_read_write();
4730
4731 return truncate();
4732 }
4733
4734
4735 /**
4736 Optimize table: public interface.
4737
4738 @sa handler::optimize()
4739 */
4740
4741 int
ha_optimize(THD * thd,HA_CHECK_OPT * check_opt)4742 handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
4743 {
4744 assert(table_share->tmp_table != NO_TMP_TABLE ||
4745 m_lock_type == F_WRLCK);
4746 mark_trx_read_write();
4747
4748 return optimize(thd, check_opt);
4749 }
4750
4751
4752 /**
4753 Analyze table: public interface.
4754
4755 @sa handler::analyze()
4756 */
4757
4758 int
ha_analyze(THD * thd,HA_CHECK_OPT * check_opt)4759 handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
4760 {
4761 assert(table_share->tmp_table != NO_TMP_TABLE ||
4762 m_lock_type != F_UNLCK);
4763 mark_trx_read_write();
4764
4765 return analyze(thd, check_opt);
4766 }
4767
4768
4769 /**
4770 Check and repair table: public interface.
4771
4772 @sa handler::check_and_repair()
4773 */
4774
4775 bool
ha_check_and_repair(THD * thd)4776 handler::ha_check_and_repair(THD *thd)
4777 {
4778 assert(table_share->tmp_table != NO_TMP_TABLE ||
4779 m_lock_type == F_UNLCK);
4780 mark_trx_read_write();
4781
4782 return check_and_repair(thd);
4783 }
4784
4785
4786 /**
4787 Disable indexes: public interface.
4788
4789 @sa handler::disable_indexes()
4790 */
4791
4792 int
ha_disable_indexes(uint mode)4793 handler::ha_disable_indexes(uint mode)
4794 {
4795 assert(table_share->tmp_table != NO_TMP_TABLE ||
4796 m_lock_type != F_UNLCK);
4797 mark_trx_read_write();
4798
4799 return disable_indexes(mode);
4800 }
4801
4802
4803 /**
4804 Enable indexes: public interface.
4805
4806 @sa handler::enable_indexes()
4807 */
4808
4809 int
ha_enable_indexes(uint mode)4810 handler::ha_enable_indexes(uint mode)
4811 {
4812 assert(table_share->tmp_table != NO_TMP_TABLE ||
4813 m_lock_type != F_UNLCK);
4814 mark_trx_read_write();
4815
4816 return enable_indexes(mode);
4817 }
4818
4819
4820 /**
4821 Discard or import tablespace: public interface.
4822
4823 @sa handler::discard_or_import_tablespace()
4824 */
4825
4826 int
ha_discard_or_import_tablespace(my_bool discard)4827 handler::ha_discard_or_import_tablespace(my_bool discard)
4828 {
4829 assert(table_share->tmp_table != NO_TMP_TABLE ||
4830 m_lock_type == F_WRLCK);
4831 mark_trx_read_write();
4832
4833 return discard_or_import_tablespace(discard);
4834 }
4835
4836
ha_prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4837 bool handler::ha_prepare_inplace_alter_table(TABLE *altered_table,
4838 Alter_inplace_info *ha_alter_info)
4839 {
4840 assert(table_share->tmp_table != NO_TMP_TABLE ||
4841 m_lock_type != F_UNLCK);
4842 mark_trx_read_write();
4843
4844 return prepare_inplace_alter_table(altered_table, ha_alter_info);
4845 }
4846
4847
ha_commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)4848 bool handler::ha_commit_inplace_alter_table(TABLE *altered_table,
4849 Alter_inplace_info *ha_alter_info,
4850 bool commit)
4851 {
4852 /*
4853 At this point we should have an exclusive metadata lock on the table.
4854 The exception is if we're about to roll back changes (commit= false).
4855 In this case, we might be rolling back after a failed lock upgrade,
4856 so we could be holding the same lock level as for inplace_alter_table().
4857 */
4858 assert(ha_thd()->mdl_context.owns_equal_or_stronger_lock(MDL_key::TABLE,
4859 table->s->db.str,
4860 table->s->table_name.str,
4861 MDL_EXCLUSIVE) ||
4862 !commit);
4863
4864 return commit_inplace_alter_table(altered_table, ha_alter_info, commit);
4865 }
4866
4867
4868 /*
4869 Default implementation to support in-place alter table
4870 and old online add/drop index API
4871 */
4872
4873 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)4874 handler::check_if_supported_inplace_alter(TABLE *altered_table,
4875 Alter_inplace_info *ha_alter_info)
4876 {
4877 DBUG_ENTER("check_if_supported_alter");
4878
4879 HA_CREATE_INFO *create_info= ha_alter_info->create_info;
4880
4881 Alter_inplace_info::HA_ALTER_FLAGS inplace_offline_operations=
4882 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH |
4883 Alter_inplace_info::ALTER_COLUMN_NAME |
4884 Alter_inplace_info::ALTER_COLUMN_DEFAULT |
4885 Alter_inplace_info::CHANGE_CREATE_OPTION |
4886 Alter_inplace_info::ALTER_RENAME |
4887 Alter_inplace_info::RENAME_INDEX |
4888 Alter_inplace_info::ALTER_INDEX_COMMENT |
4889 Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
4890
4891 /* Is there at least one operation that requires copy algorithm? */
4892 if (ha_alter_info->handler_flags & ~inplace_offline_operations)
4893 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4894
4895 /*
4896 ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. and
4897 ALTER TABLE table_name DEFAULT CHARSET = .. most likely
4898 change column charsets and so not supported in-place through
4899 old API.
4900
4901 Changing of PACK_KEYS, MAX_ROWS and ROW_FORMAT options were
4902 not supported as in-place operations in old API either.
4903 */
4904 if (create_info->used_fields & (HA_CREATE_USED_CHARSET |
4905 HA_CREATE_USED_DEFAULT_CHARSET |
4906 HA_CREATE_USED_PACK_KEYS |
4907 HA_CREATE_USED_MAX_ROWS) ||
4908 (table->s->row_type != create_info->row_type))
4909 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4910
4911 uint table_changes= (ha_alter_info->handler_flags &
4912 Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) ?
4913 IS_EQUAL_PACK_LENGTH : IS_EQUAL_YES;
4914 if (table->file->check_if_incompatible_data(create_info, table_changes)
4915 == COMPATIBLE_DATA_YES)
4916 DBUG_RETURN(HA_ALTER_INPLACE_EXCLUSIVE_LOCK);
4917
4918 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
4919 }
4920
4921
4922 /*
4923 Default implementation to support in-place alter table
4924 and old online add/drop index API
4925 */
4926
notify_table_changed()4927 void handler::notify_table_changed()
4928 {
4929 ha_create_handler_files(table->s->path.str, NULL, CHF_INDEX_FLAG, NULL);
4930 }
4931
4932
report_unsupported_error(const char * not_supported,const char * try_instead)4933 void Alter_inplace_info::report_unsupported_error(const char *not_supported,
4934 const char *try_instead)
4935 {
4936 if (unsupported_reason == NULL)
4937 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED, MYF(0),
4938 not_supported, try_instead);
4939 else
4940 my_error(ER_ALTER_OPERATION_NOT_SUPPORTED_REASON, MYF(0),
4941 not_supported, unsupported_reason, try_instead);
4942 }
4943
4944
4945 /**
4946 Rename table: public interface.
4947
4948 @sa handler::rename_table()
4949 */
4950
4951 int
ha_rename_table(const char * from,const char * to)4952 handler::ha_rename_table(const char *from, const char *to)
4953 {
4954 assert(m_lock_type == F_UNLCK);
4955 mark_trx_read_write();
4956
4957 return rename_table(from, to);
4958 }
4959
4960
4961 /**
4962 Delete table: public interface.
4963
4964 @sa handler::delete_table()
4965 */
4966
4967 int
ha_delete_table(const char * name)4968 handler::ha_delete_table(const char *name)
4969 {
4970 assert(m_lock_type == F_UNLCK);
4971 mark_trx_read_write();
4972
4973 return delete_table(name);
4974 }
4975
4976
4977 /**
4978 Drop table in the engine: public interface.
4979
4980 @sa handler::drop_table()
4981 */
4982
4983 void
ha_drop_table(const char * name)4984 handler::ha_drop_table(const char *name)
4985 {
4986 assert(m_lock_type == F_UNLCK);
4987 mark_trx_read_write();
4988
4989 return drop_table(name);
4990 }
4991
4992
4993 /**
4994 Create a table in the engine: public interface.
4995
4996 @sa handler::create()
4997 */
4998
4999 int
ha_create(const char * name,TABLE * form,HA_CREATE_INFO * info)5000 handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
5001 {
5002 assert(m_lock_type == F_UNLCK);
5003 mark_trx_read_write();
5004
5005 return create(name, form, info);
5006 }
5007
5008
5009 /**
5010 Create handler files for CREATE TABLE: public interface.
5011
5012 @sa handler::create_handler_files()
5013 */
5014
5015 int
ha_create_handler_files(const char * name,const char * old_name,int action_flag,HA_CREATE_INFO * info)5016 handler::ha_create_handler_files(const char *name, const char *old_name,
5017 int action_flag, HA_CREATE_INFO *info)
5018 {
5019 /*
5020 Normally this is done when unlocked, but in fast_alter_partition_table,
5021 it is done on an already locked handler when preparing to alter/rename
5022 partitions.
5023 */
5024 assert(m_lock_type == F_UNLCK ||
5025 (!old_name && strcmp(name, table_share->path.str)));
5026 mark_trx_read_write();
5027
5028 return create_handler_files(name, old_name, action_flag, info);
5029 }
5030
5031
5032 /**
5033 Tell the storage engine that it is allowed to "disable transaction" in the
5034 handler. It is a hint that ACID is not required - it is used in NDB for
5035 ALTER TABLE, for example, when data are copied to temporary table.
5036 A storage engine may treat this hint any way it likes. NDB for example
5037 starts to commit every now and then automatically.
5038 This hint can be safely ignored.
5039 */
ha_enable_transaction(THD * thd,bool on)5040 int ha_enable_transaction(THD *thd, bool on)
5041 {
5042 int error=0;
5043 DBUG_ENTER("ha_enable_transaction");
5044 DBUG_PRINT("enter", ("on: %d", (int) on));
5045
5046 if ((thd->get_transaction()->m_flags.enabled= on))
5047 {
5048 /*
5049 Now all storage engines should have transaction handling enabled.
5050 But some may have it enabled all the time - "disabling" transactions
5051 is an optimization hint that storage engine is free to ignore.
5052 So, let's commit an open transaction (if any) now.
5053 */
5054 if (!(error= ha_commit_trans(thd, 0)))
5055 error= trans_commit_implicit(thd);
5056 }
5057 DBUG_RETURN(error);
5058 }
5059
index_next_same(uchar * buf,const uchar * key,uint keylen)5060 int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
5061 {
5062 int error;
5063 DBUG_ENTER("index_next_same");
5064 if (!(error=index_next(buf)))
5065 {
5066 my_ptrdiff_t ptrdiff= buf - table->record[0];
5067 uchar *save_record_0= NULL;
5068 KEY *key_info= NULL;
5069 KEY_PART_INFO *key_part= NULL;
5070 KEY_PART_INFO *key_part_end= NULL;
5071
5072 /*
5073 key_cmp_if_same() compares table->record[0] against 'key'.
5074 In parts it uses table->record[0] directly, in parts it uses
5075 field objects with their local pointers into table->record[0].
5076 If 'buf' is distinct from table->record[0], we need to move
5077 all record references. This is table->record[0] itself and
5078 the field pointers of the fields used in this key.
5079 */
5080 if (ptrdiff)
5081 {
5082 save_record_0= table->record[0];
5083 table->record[0]= buf;
5084 key_info= table->key_info + active_index;
5085 key_part= key_info->key_part;
5086 key_part_end= key_part + key_info->user_defined_key_parts;
5087 for (; key_part < key_part_end; key_part++)
5088 {
5089 assert(key_part->field);
5090 key_part->field->move_field_offset(ptrdiff);
5091 }
5092 }
5093
5094 if (key_cmp_if_same(table, key, active_index, keylen))
5095 {
5096 table->status=STATUS_NOT_FOUND;
5097 error=HA_ERR_END_OF_FILE;
5098 }
5099
5100 /* Move back if necessary. */
5101 if (ptrdiff)
5102 {
5103 table->record[0]= save_record_0;
5104 for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
5105 key_part->field->move_field_offset(-ptrdiff);
5106 }
5107 }
5108 DBUG_RETURN(error);
5109 }
5110
5111 /****************************************************************************
5112 ** Some general functions that isn't in the handler class
5113 ****************************************************************************/
5114
5115 /**
5116 Initiates table-file and calls appropriate database-creator.
5117
5118 @retval
5119 0 ok
5120 @retval
5121 1 error
5122 */
ha_create_table(THD * thd,const char * path,const char * db,const char * table_name,HA_CREATE_INFO * create_info,bool update_create_info,bool is_temp_table)5123 int ha_create_table(THD *thd, const char *path,
5124 const char *db, const char *table_name,
5125 HA_CREATE_INFO *create_info,
5126 bool update_create_info,
5127 bool is_temp_table)
5128 {
5129 int error= 1;
5130 TABLE table;
5131 char name_buff[FN_REFLEN];
5132 const char *name;
5133 TABLE_SHARE share;
5134 #ifdef HAVE_PSI_TABLE_INTERFACE
5135 bool temp_table = is_temp_table ||
5136 (create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
5137 (strstr(path, tmp_file_prefix) != NULL);
5138 #endif
5139 DBUG_ENTER("ha_create_table");
5140
5141 init_tmp_table_share(thd, &share, db, 0, table_name, path);
5142 if (open_table_def(thd, &share, 0))
5143 goto err;
5144
5145 #ifdef HAVE_PSI_TABLE_INTERFACE
5146 share.m_psi= PSI_TABLE_CALL(get_table_share)(temp_table, &share);
5147 #endif
5148
5149 if (open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
5150 TRUE))
5151 {
5152 #ifdef HAVE_PSI_TABLE_INTERFACE
5153 PSI_TABLE_CALL(drop_table_share)
5154 (temp_table, db, strlen(db), table_name, strlen(table_name));
5155 #endif
5156 goto err;
5157 }
5158
5159 if (update_create_info)
5160 update_create_info_from_table(create_info, &table);
5161
5162 name= get_canonical_filename(table.file, share.path.str, name_buff);
5163
5164 error= table.file->ha_create(name, &table, create_info);
5165 if (error)
5166 {
5167 table.file->print_error(error, MYF(0));
5168 #ifdef HAVE_PSI_TABLE_INTERFACE
5169 PSI_TABLE_CALL(drop_table_share)
5170 (temp_table, db, strlen(db), table_name, strlen(table_name));
5171 #endif
5172 }
5173 (void) closefrm(&table, 0);
5174 err:
5175 free_table_share(&share);
5176 DBUG_RETURN(error != 0);
5177 }
5178
5179 /**
5180 Try to discover table from engine.
5181
5182 @note
5183 If found, write the frm file to disk.
5184
5185 @retval
5186 -1 Table did not exists
5187 @retval
5188 0 Table created ok
5189 @retval
5190 > 0 Error, table existed but could not be created
5191 */
ha_create_table_from_engine(THD * thd,const char * db,const char * name)5192 int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
5193 {
5194 int error;
5195 uchar *frmblob;
5196 size_t frmlen;
5197 char path[FN_REFLEN + 1];
5198 HA_CREATE_INFO create_info;
5199 TABLE table;
5200 TABLE_SHARE share;
5201 DBUG_ENTER("ha_create_table_from_engine");
5202 DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
5203
5204 if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
5205 {
5206 /* Table could not be discovered and thus not created */
5207 DBUG_RETURN(error);
5208 }
5209
5210 /*
5211 Table exists in handler and could be discovered
5212 frmblob and frmlen are set, write the frm to disk
5213 */
5214
5215 build_table_filename(path, sizeof(path) - 1, db, name, "", 0);
5216 // Save the frm file
5217 error= writefrm(path, frmblob, frmlen);
5218 my_free(frmblob);
5219 if (error)
5220 DBUG_RETURN(2);
5221
5222 init_tmp_table_share(thd, &share, db, 0, name, path);
5223 if (open_table_def(thd, &share, 0))
5224 {
5225 DBUG_RETURN(3);
5226 }
5227
5228 #ifdef HAVE_PSI_TABLE_INTERFACE
5229 /*
5230 Table discovery is not instrumented.
5231 Once discovered, the table will be opened normally,
5232 and instrumented normally.
5233 */
5234 #endif
5235
5236 if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE))
5237 {
5238 free_table_share(&share);
5239 DBUG_RETURN(3);
5240 }
5241
5242 update_create_info_from_table(&create_info, &table);
5243 create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
5244
5245 get_canonical_filename(table.file, path, path);
5246 error=table.file->ha_create(path, &table, &create_info);
5247 (void) closefrm(&table, 1);
5248
5249 DBUG_RETURN(error != 0);
5250 }
5251
5252
5253 /**
5254 Try to find a table in a storage engine.
5255
5256 @param db Normalized table schema name
5257 @param name Normalized table name.
5258 @param[out] exists Only valid if the function succeeded.
5259
5260 @retval TRUE An error is found
5261 @retval FALSE Success, check *exists
5262 */
5263
5264 bool
ha_check_if_table_exists(THD * thd,const char * db,const char * name,bool * exists)5265 ha_check_if_table_exists(THD* thd, const char *db, const char *name,
5266 bool *exists)
5267 {
5268 uchar *frmblob= NULL;
5269 size_t frmlen;
5270 DBUG_ENTER("ha_check_if_table_exists");
5271
5272 *exists= ! ha_discover(thd, db, name, &frmblob, &frmlen);
5273 if (*exists)
5274 my_free(frmblob);
5275
5276 DBUG_RETURN(FALSE);
5277 }
5278
5279
5280 /**
5281 @brief Check if a given table is a user table or a valid system table or
5282 a valid system table that a SE supports.
5283
5284 @param hton Handlerton of new engine.
5285 @param db Database name.
5286 @param table_name Table name to be checked.
5287
5288 @retval st_sys_tbl_chk_params::enum_status
5289 */
5290 static st_sys_tbl_chk_params::enum_status
ha_get_system_table_check_status(handlerton * hton,const char * db,const char * table_name)5291 ha_get_system_table_check_status(handlerton *hton, const char *db,
5292 const char *table_name)
5293 {
5294 DBUG_ENTER("ha_get_system_table_check_status");
5295 st_sys_tbl_chk_params check_params;
5296 check_params.status= st_sys_tbl_chk_params::USER_TABLE;
5297 bool is_system_database= false;
5298 const char **names;
5299 st_handler_tablename *systab;
5300
5301 // Check if we have a system database name in the command.
5302 assert(known_system_databases != NULL);
5303 names= known_system_databases;
5304 while (names && *names)
5305 {
5306 if (strcmp(*names, db) == 0)
5307 {
5308 /* Used to compare later, will be faster */
5309 check_params.db= *names;
5310 is_system_database= true;
5311 break;
5312 }
5313 names++;
5314 }
5315 if (!is_system_database)
5316 DBUG_RETURN(st_sys_tbl_chk_params::USER_TABLE);
5317
5318 // Check if this is SQL layer system tables.
5319 systab= mysqld_system_tables;
5320 check_params.is_sql_layer_system_table= false;
5321 while (systab && systab->db)
5322 {
5323 if (systab->db == check_params.db &&
5324 strcmp(systab->tablename, table_name) == 0)
5325 {
5326 check_params.is_sql_layer_system_table= true;
5327 break;
5328 }
5329 systab++;
5330 }
5331
5332 // Check if this is a system table and if some engine supports it.
5333 check_params.status= check_params.is_sql_layer_system_table ?
5334 st_sys_tbl_chk_params::SYSTEM_TABLE :
5335 st_sys_tbl_chk_params::USER_TABLE;
5336 check_params.db_type= hton->db_type;
5337 check_params.table_name= table_name;
5338 plugin_foreach(NULL, check_engine_system_table_handlerton,
5339 MYSQL_STORAGE_ENGINE_PLUGIN, &check_params);
5340
5341 DBUG_RETURN(check_params.status);
5342 }
5343
5344
5345 /**
5346 @brief Check if a given table is a system table supported by a SE.
5347
5348 @todo There is another function called is_system_table_name() used by
5349 get_table_category(), which is used to set TABLE_SHARE table_category.
5350 It checks only a subset of table name like proc, event and time*.
5351 We cannot use below function in get_table_category(),
5352 as that affects locking mechanism. If we need to
5353 unify these functions, we need to fix locking issues generated.
5354
5355 @param hton Handlerton of new engine.
5356 @param db Database name.
5357 @param table_name Table name to be checked.
5358
5359 @return Operation status
5360 @retval true If the table name is a valid system table
5361 that is supported by a SE.
5362
5363 @retval false Not a system table.
5364 */
ha_is_supported_system_table(handlerton * hton,const char * db,const char * table_name)5365 bool ha_is_supported_system_table(handlerton *hton, const char *db,
5366 const char *table_name)
5367 {
5368 DBUG_ENTER("ha_is_supported_system_table");
5369 st_sys_tbl_chk_params::enum_status status=
5370 ha_get_system_table_check_status(hton, db, table_name);
5371
5372 // It's a valid SE supported system table.
5373 DBUG_RETURN(status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5374 }
5375
5376
5377 /**
5378 @brief Check if a given table is a system table that belongs
5379 to some SE or a user table.
5380
5381 @details The primary purpose of introducing this function is to stop system
5382 tables to be created or being moved to undesired storage engines.
5383
5384 @todo There is another function called is_system_table_name() used by
5385 get_table_category(), which is used to set TABLE_SHARE table_category.
5386 It checks only a subset of table name like proc, event and time*.
5387 We cannot use below function in get_table_category(),
5388 as that affects locking mechanism. If we need to
5389 unify these functions, we need to fix locking issues generated.
5390
5391 @param hton Handlerton of new engine.
5392 @param db Database name.
5393 @param table_name Table name to be checked.
5394
5395 @return Operation status
5396 @retval true If the table name is a valid system table
5397 or if its a valid user table.
5398
5399 @retval false If the table name is a system table name
5400 and does not belong to engine specified
5401 in the command.
5402 */
ha_is_valid_system_or_user_table(handlerton * hton,const char * db,const char * table_name)5403 bool ha_is_valid_system_or_user_table(handlerton *hton, const char *db,
5404 const char *table_name)
5405 {
5406 DBUG_ENTER("ha_is_valid_system_or_user_table");
5407
5408 st_sys_tbl_chk_params::enum_status status=
5409 ha_get_system_table_check_status(hton, db, table_name);
5410
5411 // It's a user table or a valid SE supported system table.
5412 DBUG_RETURN(status == st_sys_tbl_chk_params::USER_TABLE ||
5413 status == st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE);
5414 }
5415
5416
5417 /**
5418 @brief Called for each SE to check if given db, tablename is a system table.
5419
5420 @details The primary purpose of introducing this function is to stop system
5421 tables to be created or being moved to undesired storage engines.
5422
5423 @param unused unused THD*
5424 @param plugin Points to specific SE.
5425 @param arg Is of type struct st_sys_tbl_chk_params.
5426
5427 @note
5428 args->status Indicates OUT param,
5429 see struct st_sys_tbl_chk_params definition for more info.
5430
5431 @return Operation status
5432 @retval true There was a match found.
5433 This will stop doing checks with other SE's.
5434
5435 @retval false There was no match found.
5436 Other SE's will be checked to find a match.
5437 */
check_engine_system_table_handlerton(THD * unused,plugin_ref plugin,void * arg)5438 static my_bool check_engine_system_table_handlerton(THD *unused,
5439 plugin_ref plugin,
5440 void *arg)
5441 {
5442 st_sys_tbl_chk_params *check_params= (st_sys_tbl_chk_params*) arg;
5443 handlerton *hton= plugin_data<handlerton*>(plugin);
5444
5445 // Do we already know that the table is a system table?
5446 if (check_params->status == st_sys_tbl_chk_params::SYSTEM_TABLE)
5447 {
5448 /*
5449 If this is the same SE specified in the command, we can
5450 simply ask the SE if it supports it stop the search regardless.
5451 */
5452 if (hton->db_type == check_params->db_type)
5453 {
5454 if (hton->is_supported_system_table &&
5455 hton->is_supported_system_table(check_params->db,
5456 check_params->table_name,
5457 check_params->is_sql_layer_system_table))
5458 check_params->status=
5459 st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5460 return TRUE;
5461 }
5462 /*
5463 If this is a different SE, there is no point in asking the SE
5464 since we already know it's a system table and we don't care
5465 if it is supported or not.
5466 */
5467 return FALSE;
5468 }
5469
5470 /*
5471 We don't yet know if the table is a system table or not.
5472 We therefore must always ask the SE.
5473 */
5474 if (hton->is_supported_system_table &&
5475 hton->is_supported_system_table(check_params->db,
5476 check_params->table_name,
5477 check_params->is_sql_layer_system_table))
5478 {
5479 /*
5480 If this is the same SE specified in the command, we know it's a
5481 supported system table and can stop the search.
5482 */
5483 if (hton->db_type == check_params->db_type)
5484 {
5485 check_params->status= st_sys_tbl_chk_params::SE_SUPPORTED_SYSTEM_TABLE;
5486 return TRUE;
5487 }
5488 else
5489 check_params->status= st_sys_tbl_chk_params::SYSTEM_TABLE;
5490 }
5491
5492 return FALSE;
5493 }
5494
5495 /*
5496 Prepare list of all known system database names
5497 current we just have 'mysql' as system database name.
5498
5499 Later ndbcluster, innodb SE's can define some new database
5500 name which can store system tables specific to SE.
5501 */
ha_known_system_databases(void)5502 const char** ha_known_system_databases(void)
5503 {
5504 list<const char*> found_databases;
5505 const char **databases, **database;
5506
5507 // Get mysqld system database name.
5508 found_databases.push_back((char*) mysqld_system_database);
5509
5510 // Get system database names from every specific storage engine.
5511 plugin_foreach(NULL, system_databases_handlerton,
5512 MYSQL_STORAGE_ENGINE_PLUGIN, &found_databases);
5513
5514 databases= (const char **) my_once_alloc(sizeof(char *)*
5515 (found_databases.size()+1),
5516 MYF(MY_WME | MY_FAE));
5517 assert(databases != NULL);
5518
5519 list<const char*>::iterator it;
5520 database= databases;
5521 for (it= found_databases.begin(); it != found_databases.end(); it++)
5522 *database++= *it;
5523 *database= 0; // Last element.
5524
5525 return databases;
5526 }
5527
5528 /**
5529 @brief Fetch system database name specific to SE.
5530
5531 @details This function is invoked by plugin_foreach() from
5532 ha_known_system_databases(), for each storage engine.
5533 */
system_databases_handlerton(THD * unused,plugin_ref plugin,void * arg)5534 static my_bool system_databases_handlerton(THD *unused, plugin_ref plugin,
5535 void *arg)
5536 {
5537 list<const char*> *found_databases= (list<const char*> *) arg;
5538 const char *db;
5539
5540 handlerton *hton= plugin_data<handlerton*>(plugin);
5541 if (hton->system_database)
5542 {
5543 db= hton->system_database();
5544 if (db)
5545 found_databases->push_back(db);
5546 }
5547
5548 return FALSE;
5549 }
5550
init()5551 void st_ha_check_opt::init()
5552 {
5553 flags= sql_flags= 0;
5554 }
5555
5556
5557 /*****************************************************************************
5558 Key cache handling.
5559
5560 This code is only relevant for ISAM/MyISAM tables
5561
5562 key_cache->cache may be 0 only in the case where a key cache is not
5563 initialized or when we where not able to init the key cache in a previous
5564 call to ha_init_key_cache() (probably out of memory)
5565 *****************************************************************************/
5566
5567 /**
5568 Init a key cache if it has not been initied before.
5569 */
ha_init_key_cache(const char * name,KEY_CACHE * key_cache)5570 int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
5571 {
5572 DBUG_ENTER("ha_init_key_cache");
5573
5574 if (!key_cache->key_cache_inited)
5575 {
5576 mysql_mutex_lock(&LOCK_global_system_variables);
5577 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5578 ulonglong tmp_block_size= key_cache->param_block_size;
5579 ulonglong division_limit= key_cache->param_division_limit;
5580 ulonglong age_threshold= key_cache->param_age_threshold;
5581 mysql_mutex_unlock(&LOCK_global_system_variables);
5582 DBUG_RETURN(!init_key_cache(key_cache,
5583 tmp_block_size,
5584 tmp_buff_size,
5585 division_limit, age_threshold));
5586 }
5587 DBUG_RETURN(0);
5588 }
5589
5590
5591 /**
5592 Resize key cache.
5593 */
ha_resize_key_cache(KEY_CACHE * key_cache)5594 int ha_resize_key_cache(KEY_CACHE *key_cache)
5595 {
5596 DBUG_ENTER("ha_resize_key_cache");
5597
5598 if (key_cache->key_cache_inited)
5599 {
5600 mysql_mutex_lock(&LOCK_global_system_variables);
5601 size_t tmp_buff_size= (size_t) key_cache->param_buff_size;
5602 ulonglong tmp_block_size= key_cache->param_block_size;
5603 ulonglong division_limit= key_cache->param_division_limit;
5604 ulonglong age_threshold= key_cache->param_age_threshold;
5605 mysql_mutex_unlock(&LOCK_global_system_variables);
5606 const int retval= resize_key_cache(key_cache,
5607 keycache_thread_var(),
5608 tmp_block_size,
5609 tmp_buff_size,
5610 division_limit, age_threshold);
5611 DBUG_RETURN(!retval);
5612 }
5613 DBUG_RETURN(0);
5614 }
5615
5616
5617 /**
5618 Move all tables from one key cache to another one.
5619 */
ha_change_key_cache(KEY_CACHE * old_key_cache,KEY_CACHE * new_key_cache)5620 int ha_change_key_cache(KEY_CACHE *old_key_cache,
5621 KEY_CACHE *new_key_cache)
5622 {
5623 mi_change_key_cache(old_key_cache, new_key_cache);
5624 return 0;
5625 }
5626
5627
5628 /**
5629 Try to discover one table from handler(s).
5630
5631 @retval
5632 -1 Table did not exists
5633 @retval
5634 0 OK. In this case *frmblob and *frmlen are set
5635 @retval
5636 >0 error. frmblob and frmlen may not be set
5637 */
5638 struct st_discover_args
5639 {
5640 const char *db;
5641 const char *name;
5642 uchar **frmblob;
5643 size_t *frmlen;
5644 };
5645
discover_handlerton(THD * thd,plugin_ref plugin,void * arg)5646 static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
5647 void *arg)
5648 {
5649 st_discover_args *vargs= (st_discover_args *)arg;
5650 handlerton *hton= plugin_data<handlerton*>(plugin);
5651 if (hton->state == SHOW_OPTION_YES && hton->discover &&
5652 (!(hton->discover(hton, thd, vargs->db, vargs->name,
5653 vargs->frmblob,
5654 vargs->frmlen))))
5655 return TRUE;
5656
5657 return FALSE;
5658 }
5659
ha_discover(THD * thd,const char * db,const char * name,uchar ** frmblob,size_t * frmlen)5660 int ha_discover(THD *thd, const char *db, const char *name,
5661 uchar **frmblob, size_t *frmlen)
5662 {
5663 int error= -1; // Table does not exist in any handler
5664 DBUG_ENTER("ha_discover");
5665 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5666 st_discover_args args= {db, name, frmblob, frmlen};
5667
5668 if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
5669 DBUG_RETURN(error);
5670
5671 if (plugin_foreach(thd, discover_handlerton,
5672 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
5673 error= 0;
5674
5675 if (!error)
5676 {
5677 assert(!thd->status_var_aggregated);
5678 thd->status_var.ha_discover_count++;
5679 }
5680 DBUG_RETURN(error);
5681 }
5682
5683
5684 /**
5685 Call this function in order to give the handler the possiblity
5686 to ask engine if there are any new tables that should be written to disk
5687 or any dropped tables that need to be removed from disk
5688 */
5689 struct st_find_files_args
5690 {
5691 const char *db;
5692 const char *path;
5693 const char *wild;
5694 bool dir;
5695 List<LEX_STRING> *files;
5696 };
5697
find_files_handlerton(THD * thd,plugin_ref plugin,void * arg)5698 static my_bool find_files_handlerton(THD *thd, plugin_ref plugin,
5699 void *arg)
5700 {
5701 st_find_files_args *vargs= (st_find_files_args *)arg;
5702 handlerton *hton= plugin_data<handlerton*>(plugin);
5703
5704
5705 if (hton->state == SHOW_OPTION_YES && hton->find_files)
5706 if (hton->find_files(hton, thd, vargs->db, vargs->path, vargs->wild,
5707 vargs->dir, vargs->files))
5708 return TRUE;
5709
5710 return FALSE;
5711 }
5712
5713 int
ha_find_files(THD * thd,const char * db,const char * path,const char * wild,bool dir,List<LEX_STRING> * files)5714 ha_find_files(THD *thd,const char *db,const char *path,
5715 const char *wild, bool dir, List<LEX_STRING> *files)
5716 {
5717 int error= 0;
5718 DBUG_ENTER("ha_find_files");
5719 DBUG_PRINT("enter", ("db: '%s' path: '%s' wild: '%s' dir: %d",
5720 db, path, wild ? wild : "NULL", dir));
5721 st_find_files_args args= {db, path, wild, dir, files};
5722
5723 plugin_foreach(thd, find_files_handlerton,
5724 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5725 /* The return value is not currently used */
5726 DBUG_RETURN(error);
5727 }
5728
5729 /**
5730 Ask handler if the table exists in engine.
5731 @retval
5732 HA_ERR_NO_SUCH_TABLE Table does not exist
5733 @retval
5734 HA_ERR_TABLE_EXIST Table exists
5735 @retval
5736 \# Error code
5737 */
5738 struct st_table_exists_in_engine_args
5739 {
5740 const char *db;
5741 const char *name;
5742 int err;
5743 };
5744
table_exists_in_engine_handlerton(THD * thd,plugin_ref plugin,void * arg)5745 static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
5746 void *arg)
5747 {
5748 st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
5749 handlerton *hton= plugin_data<handlerton*>(plugin);
5750
5751 int err= HA_ERR_NO_SUCH_TABLE;
5752
5753 if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
5754 err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
5755
5756 vargs->err = err;
5757 if (vargs->err == HA_ERR_TABLE_EXIST)
5758 return TRUE;
5759
5760 return FALSE;
5761 }
5762
ha_table_exists_in_engine(THD * thd,const char * db,const char * name)5763 int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
5764 {
5765 DBUG_ENTER("ha_table_exists_in_engine");
5766 DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
5767 st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
5768 plugin_foreach(thd, table_exists_in_engine_handlerton,
5769 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5770 DBUG_PRINT("exit", ("error: %d", args.err));
5771 DBUG_RETURN(args.err);
5772 }
5773
5774 /**
5775 Prepare (sub-) sequences of joins in this statement
5776 which may be pushed to each storage engine for execution.
5777 */
5778 struct st_make_pushed_join_args
5779 {
5780 const AQP::Join_plan* plan; // Query plan provided by optimizer
5781 int err; // Error code to return.
5782 };
5783
make_pushed_join_handlerton(THD * thd,plugin_ref plugin,void * arg)5784 static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
5785 void *arg)
5786 {
5787 st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
5788 handlerton *hton= plugin_data<handlerton*>(plugin);
5789
5790 if (hton && hton->make_pushed_join)
5791 {
5792 const int error= hton->make_pushed_join(hton, thd, vargs->plan);
5793 if (unlikely(error))
5794 {
5795 vargs->err = error;
5796 return TRUE;
5797 }
5798 }
5799 return FALSE;
5800 }
5801
ha_make_pushed_joins(THD * thd,const AQP::Join_plan * plan)5802 int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
5803 {
5804 DBUG_ENTER("ha_make_pushed_joins");
5805 st_make_pushed_join_args args= {plan, 0};
5806 plugin_foreach(thd, make_pushed_join_handlerton,
5807 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
5808 DBUG_PRINT("exit", ("error: %d", args.err));
5809 DBUG_RETURN(args.err);
5810 }
5811
5812 /*
5813 TODO: change this into a dynamic struct
5814 List<handlerton> does not work as
5815 1. binlog_end is called when MEM_ROOT is gone
5816 2. cannot work with thd MEM_ROOT as memory should be freed
5817 */
5818 #define MAX_HTON_LIST_ST 63
5819 struct hton_list_st
5820 {
5821 handlerton *hton[MAX_HTON_LIST_ST];
5822 uint sz;
5823 };
5824
5825 struct binlog_func_st
5826 {
5827 enum_binlog_func fn;
5828 void *arg;
5829 };
5830
5831 /** @brief
5832 Listing handlertons first to avoid recursive calls and deadlock
5833 */
binlog_func_list(THD * thd,plugin_ref plugin,void * arg)5834 static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg)
5835 {
5836 hton_list_st *hton_list= (hton_list_st *)arg;
5837 handlerton *hton= plugin_data<handlerton*>(plugin);
5838 if (hton->state == SHOW_OPTION_YES && hton->binlog_func)
5839 {
5840 uint sz= hton_list->sz;
5841 if (sz == MAX_HTON_LIST_ST-1)
5842 {
5843 /* list full */
5844 return FALSE;
5845 }
5846 hton_list->hton[sz]= hton;
5847 hton_list->sz= sz+1;
5848 }
5849 return FALSE;
5850 }
5851
binlog_func_foreach(THD * thd,binlog_func_st * bfn)5852 static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn)
5853 {
5854 hton_list_st hton_list;
5855 uint i, sz;
5856
5857 hton_list.sz= 0;
5858 plugin_foreach(thd, binlog_func_list,
5859 MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list);
5860
5861 for (i= 0, sz= hton_list.sz; i < sz ; i++)
5862 hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg);
5863 return FALSE;
5864 }
5865
5866
ha_reset_logs(THD * thd)5867 int ha_reset_logs(THD *thd)
5868 {
5869 binlog_func_st bfn= {BFN_RESET_LOGS, 0};
5870 binlog_func_foreach(thd, &bfn);
5871 return 0;
5872 }
5873
ha_reset_slave(THD * thd)5874 void ha_reset_slave(THD* thd)
5875 {
5876 binlog_func_st bfn= {BFN_RESET_SLAVE, 0};
5877 binlog_func_foreach(thd, &bfn);
5878 }
5879
ha_binlog_wait(THD * thd)5880 void ha_binlog_wait(THD* thd)
5881 {
5882 binlog_func_st bfn= {BFN_BINLOG_WAIT, 0};
5883 binlog_func_foreach(thd, &bfn);
5884 }
5885
ha_binlog_index_purge_file(THD * thd,const char * file)5886 int ha_binlog_index_purge_file(THD *thd, const char *file)
5887 {
5888 binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file};
5889 binlog_func_foreach(thd, &bfn);
5890 return 0;
5891 }
5892
5893 struct binlog_log_query_st
5894 {
5895 enum_binlog_command binlog_command;
5896 const char *query;
5897 size_t query_length;
5898 const char *db;
5899 const char *table_name;
5900 };
5901
binlog_log_query_handlerton2(THD * thd,handlerton * hton,void * args)5902 static my_bool binlog_log_query_handlerton2(THD *thd,
5903 handlerton *hton,
5904 void *args)
5905 {
5906 struct binlog_log_query_st *b= (struct binlog_log_query_st*)args;
5907 if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query)
5908 hton->binlog_log_query(hton, thd,
5909 b->binlog_command,
5910 b->query,
5911 b->query_length,
5912 b->db,
5913 b->table_name);
5914 return FALSE;
5915 }
5916
binlog_log_query_handlerton(THD * thd,plugin_ref plugin,void * args)5917 static my_bool binlog_log_query_handlerton(THD *thd,
5918 plugin_ref plugin,
5919 void *args)
5920 {
5921 return binlog_log_query_handlerton2(thd,
5922 plugin_data<handlerton*>(plugin), args);
5923 }
5924
ha_binlog_log_query(THD * thd,handlerton * hton,enum_binlog_command binlog_command,const char * query,size_t query_length,const char * db,const char * table_name)5925 void ha_binlog_log_query(THD *thd, handlerton *hton,
5926 enum_binlog_command binlog_command,
5927 const char *query, size_t query_length,
5928 const char *db, const char *table_name)
5929 {
5930 struct binlog_log_query_st b;
5931 b.binlog_command= binlog_command;
5932 b.query= query;
5933 b.query_length= query_length;
5934 b.db= db;
5935 b.table_name= table_name;
5936 if (hton == 0)
5937 plugin_foreach(thd, binlog_log_query_handlerton,
5938 MYSQL_STORAGE_ENGINE_PLUGIN, &b);
5939 else
5940 binlog_log_query_handlerton2(thd, hton, &b);
5941 }
5942
ha_binlog_end(THD * thd)5943 int ha_binlog_end(THD* thd)
5944 {
5945 binlog_func_st bfn= {BFN_BINLOG_END, 0};
5946 binlog_func_foreach(thd, &bfn);
5947 return 0;
5948 }
5949
5950 /**
5951 Calculate cost of 'index only' scan for given index and number of records
5952
5953 @param keynr Index number
5954 @param records Estimated number of records to be retrieved
5955
5956 @note
5957 It is assumed that we will read trough the whole key range and that all
5958 key blocks are half full (normally things are much better). It is also
5959 assumed that each time we read the next key from the index, the handler
5960 performs a random seek, thus the cost is proportional to the number of
5961 blocks read.
5962
5963 @return
5964 Estimated cost of 'index only' scan
5965 */
5966
index_only_read_time(uint keynr,double records)5967 double handler::index_only_read_time(uint keynr, double records)
5968 {
5969 double read_time;
5970 uint keys_per_block= (stats.block_size/2/
5971 (table_share->key_info[keynr].key_length + ref_length) +
5972 1);
5973 read_time=((double) (records + keys_per_block-1) /
5974 (double) keys_per_block);
5975 return read_time;
5976 }
5977
5978
table_in_memory_estimate() const5979 double handler::table_in_memory_estimate() const
5980 {
5981 assert(stats.table_in_mem_estimate == IN_MEMORY_ESTIMATE_UNKNOWN ||
5982 (stats.table_in_mem_estimate >= 0.0 &&
5983 stats.table_in_mem_estimate <= 1.0));
5984
5985 /*
5986 If the storage engine has supplied information about how much of the
5987 table that is currently in a memory buffer, then use this estimate.
5988 */
5989 if (stats.table_in_mem_estimate != IN_MEMORY_ESTIMATE_UNKNOWN)
5990 return stats.table_in_mem_estimate;
5991
5992 /*
5993 The storage engine has not provided any information about how much of
5994 this index is in memory, use an heuristic to produce an estimate.
5995 */
5996 return estimate_in_memory_buffer(stats.data_file_length);
5997 }
5998
5999
index_in_memory_estimate(uint keyno) const6000 double handler::index_in_memory_estimate(uint keyno) const
6001 {
6002 const KEY *key= &table->key_info[keyno];
6003
6004 /*
6005 If the storage engine has supplied information about how much of the
6006 index that is currently in a memory buffer, then use this estimate.
6007 */
6008 const double est= key->in_memory_estimate();
6009 if (est != IN_MEMORY_ESTIMATE_UNKNOWN)
6010 return est;
6011
6012 /*
6013 The storage engine has not provided any information about how much of
6014 this index is in memory, use an heuristic to produce an estimate.
6015 */
6016 ulonglong file_length;
6017
6018 /*
6019 If the index is a clustered primary index, then use the data file
6020 size as estimate for how large the index is.
6021 */
6022 if (keyno == table->s->primary_key && primary_key_is_clustered())
6023 file_length= stats.data_file_length;
6024 else
6025 file_length= stats.index_file_length;
6026
6027 return estimate_in_memory_buffer(file_length);
6028 }
6029
6030
estimate_in_memory_buffer(ulonglong table_index_size) const6031 double handler::estimate_in_memory_buffer(ulonglong table_index_size) const
6032 {
6033 /*
6034 The storage engine has not provided any information about how much of
6035 the table/index is in memory. In this case we use a heuristic:
6036
6037 - if the size of the table/index is less than 20 percent (pick any
6038 number) of the memory buffer, then the entire table/index is likely in
6039 memory.
6040 - if the size of the table/index is larger than the memory buffer, then
6041 assume nothing of the table/index is in memory.
6042 - if the size of the table/index is larger than 20 percent but less than
6043 the memory buffer size, then use a linear function of the table/index
6044 size that goes from 1.0 to 0.0.
6045 */
6046
6047 /*
6048 If the storage engine has information about the size of its
6049 memory buffer, then use this. Otherwise, assume that at least 100 MB
6050 of data can be chached in memory.
6051 */
6052 longlong memory_buf_size= get_memory_buffer_size();
6053 if (memory_buf_size <= 0)
6054 memory_buf_size= 100 * 1024 * 1024; // 100 MB
6055
6056 /*
6057 Upper limit for the relative size of a table to be considered
6058 entirely available in a memory buffer. If the actual table size is
6059 less than this we assume it is complete cached in a memory buffer.
6060 */
6061 const double table_index_in_memory_limit= 0.2;
6062
6063 /*
6064 Estimate for how much of the total memory buffer this table/index
6065 can occupy.
6066 */
6067 const double percent_of_mem= static_cast<double>(table_index_size) /
6068 memory_buf_size;
6069
6070 double in_mem_est;
6071
6072 if (percent_of_mem < table_index_in_memory_limit) // Less than 20 percent
6073 in_mem_est= 1.0;
6074 else if (percent_of_mem > 1.0) // Larger than buffer
6075 in_mem_est= 0.0;
6076 else
6077 {
6078 /*
6079 The size of the table/index is larger than
6080 "table_index_in_memory_limit" * "memory_buf_size" but less than
6081 the total size of the memory buffer.
6082 */
6083 in_mem_est= 1.0 - (percent_of_mem - table_index_in_memory_limit) /
6084 (1.0 - table_index_in_memory_limit);
6085 }
6086 assert(in_mem_est >= 0.0 && in_mem_est <= 1.0);
6087
6088 return in_mem_est;
6089 }
6090
6091
table_scan_cost()6092 Cost_estimate handler::table_scan_cost()
6093 {
6094 /*
6095 This function returns a Cost_estimate object. The function should be
6096 implemented in a way that allows the compiler to use "return value
6097 optimization" to avoid creating the temporary object for the return value
6098 and use of the copy constructor.
6099 */
6100
6101 const double io_cost= scan_time() * table->cost_model()->page_read_cost(1.0);
6102 Cost_estimate cost;
6103 cost.add_io(io_cost);
6104 return cost;
6105 }
6106
6107
index_scan_cost(uint index,double ranges,double rows)6108 Cost_estimate handler::index_scan_cost(uint index, double ranges, double rows)
6109 {
6110 /*
6111 This function returns a Cost_estimate object. The function should be
6112 implemented in a way that allows the compiler to use "return value
6113 optimization" to avoid creating the temporary object for the return value
6114 and use of the copy constructor.
6115 */
6116
6117 assert(ranges >= 0.0);
6118 assert(rows >= 0.0);
6119
6120 const double io_cost= index_only_read_time(index, rows) *
6121 table->cost_model()->page_read_cost_index(index, 1.0);
6122 Cost_estimate cost;
6123 cost.add_io(io_cost);
6124 return cost;
6125 }
6126
6127
read_cost(uint index,double ranges,double rows)6128 Cost_estimate handler::read_cost(uint index, double ranges, double rows)
6129 {
6130 /*
6131 This function returns a Cost_estimate object. The function should be
6132 implemented in a way that allows the compiler to use "return value
6133 optimization" to avoid creating the temporary object for the return value
6134 and use of the copy constructor.
6135 */
6136
6137 assert(ranges >= 0.0);
6138 assert(rows >= 0.0);
6139
6140 const double io_cost= read_time(index, static_cast<uint>(ranges),
6141 static_cast<ha_rows>(rows)) *
6142 table->cost_model()->page_read_cost(1.0);
6143 Cost_estimate cost;
6144 cost.add_io(io_cost);
6145 return cost;
6146 }
6147
6148
6149 /**
6150 Check if key has partially-covered columns
6151
6152 We can't use DS-MRR to perform range scans when the ranges are over
6153 partially-covered keys, because we'll not have full key part values
6154 (we'll have their prefixes from the index) and will not be able to check
6155 if we've reached the end the range.
6156
6157 @param keyno Key to check
6158
6159 @todo
6160 Allow use of DS-MRR in cases where the index has partially-covered
6161 components but they are not used for scanning.
6162
6163 @retval TRUE Yes
6164 @retval FALSE No
6165 */
6166
key_uses_partial_cols(TABLE * table,uint keyno)6167 bool key_uses_partial_cols(TABLE *table, uint keyno)
6168 {
6169 KEY_PART_INFO *kp= table->key_info[keyno].key_part;
6170 KEY_PART_INFO *kp_end= kp + table->key_info[keyno].user_defined_key_parts;
6171 for (; kp != kp_end; kp++)
6172 {
6173 if (!kp->field->part_of_key.is_set(keyno))
6174 return TRUE;
6175 }
6176 return FALSE;
6177 }
6178
6179 /****************************************************************************
6180 * Default MRR implementation (MRR to non-MRR converter)
6181 ***************************************************************************/
6182
6183 /**
6184 Get cost and other information about MRR scan over a known list of ranges
6185
6186 Calculate estimated cost and other information about an MRR scan for given
6187 sequence of ranges.
6188
6189 @param keyno Index number
6190 @param seq Range sequence to be traversed
6191 @param seq_init_param First parameter for seq->init()
6192 @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
6193 can't efficiently determine it
6194 @param bufsz[in,out] IN: Size of the buffer available for use
6195 OUT: Size of the buffer that is expected to be actually
6196 used, or 0 if buffer is not needed.
6197 @param flags[in,out] A combination of HA_MRR_* flags
6198 @param cost[out] Estimated cost of MRR access
6199
6200 @note
6201 This method (or an overriding one in a derived class) must check for
6202 thd->killed and return HA_POS_ERROR if it is not zero. This is required
6203 for a user to be able to interrupt the calculation by killing the
6204 connection/query.
6205
6206 @retval
6207 HA_POS_ERROR Error or the engine is unable to perform the requested
6208 scan. Values of OUT parameters are undefined.
6209 @retval
6210 other OK, *cost contains cost of the scan, *bufsz and *flags
6211 contain scan parameters.
6212 */
6213
6214 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)6215 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
6216 void *seq_init_param, uint n_ranges_arg,
6217 uint *bufsz, uint *flags,
6218 Cost_estimate *cost)
6219 {
6220 KEY_MULTI_RANGE range;
6221 range_seq_t seq_it;
6222 ha_rows rows, total_rows= 0;
6223 uint n_ranges=0;
6224 THD *thd= current_thd;
6225
6226 /* Default MRR implementation doesn't need buffer */
6227 *bufsz= 0;
6228
6229 DBUG_EXECUTE_IF("bug13822652_2", thd->killed= THD::KILL_QUERY;);
6230
6231 seq_it= seq->init(seq_init_param, n_ranges, *flags);
6232 while (!seq->next(seq_it, &range))
6233 {
6234 if (unlikely(thd->killed != 0))
6235 return HA_POS_ERROR;
6236
6237 n_ranges++;
6238 key_range *min_endp, *max_endp;
6239 if (range.range_flag & GEOM_FLAG)
6240 {
6241 min_endp= &range.start_key;
6242 max_endp= NULL;
6243 }
6244 else
6245 {
6246 min_endp= range.start_key.length? &range.start_key : NULL;
6247 max_endp= range.end_key.length? &range.end_key : NULL;
6248 }
6249 /*
6250 Get the number of rows in the range. This is done by calling
6251 records_in_range() unless:
6252
6253 1) The range is an equality range and the index is unique.
6254 There cannot be more than one matching row, so 1 is
6255 assumed. Note that it is possible that the correct number
6256 is actually 0, so the row estimate may be too high in this
6257 case. Also note: ranges of the form "x IS NULL" may have more
6258 than 1 mathing row so records_in_range() is called for these.
6259 2) a) The range is an equality range but the index is either
6260 not unique or all of the keyparts are not used.
6261 b) The user has requested that index statistics should be used
6262 for equality ranges to avoid the incurred overhead of
6263 index dives in records_in_range().
6264 c) Index statistics is available.
6265 Ranges of the form "x IS NULL" will not use index statistics
6266 because the number of rows with this value are likely to be
6267 very different than the values in the index statistics.
6268 */
6269 int keyparts_used= 0;
6270 if ((range.range_flag & UNIQUE_RANGE) && // 1)
6271 !(range.range_flag & NULL_RANGE))
6272 rows= 1; /* there can be at most one row */
6273 else if ((range.range_flag & EQ_RANGE) && // 2a)
6274 (range.range_flag & USE_INDEX_STATISTICS) && // 2b)
6275 (keyparts_used= my_count_bits(range.start_key.keypart_map)) &&
6276 table->
6277 key_info[keyno].has_records_per_key(keyparts_used-1) && // 2c)
6278 !(range.range_flag & NULL_RANGE))
6279 {
6280 rows= static_cast<ha_rows>(
6281 table->key_info[keyno].records_per_key(keyparts_used - 1));
6282 }
6283 else
6284 {
6285 DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
6286 assert(min_endp || max_endp);
6287 if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
6288 max_endp)))
6289 {
6290 /* Can't scan one range => can't do MRR scan at all */
6291 total_rows= HA_POS_ERROR;
6292 break;
6293 }
6294 }
6295 total_rows += rows;
6296 }
6297
6298 if (total_rows != HA_POS_ERROR)
6299 {
6300 const Cost_model_table *const cost_model= table->cost_model();
6301
6302 /* The following calculation is the same as in multi_range_read_info(): */
6303 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6304 *flags|= HA_MRR_SUPPORT_SORTED;
6305
6306 assert(cost->is_zero());
6307 if (*flags & HA_MRR_INDEX_ONLY)
6308 *cost= index_scan_cost(keyno, static_cast<double>(n_ranges),
6309 static_cast<double>(total_rows));
6310 else
6311 *cost= read_cost(keyno, static_cast<double>(n_ranges),
6312 static_cast<double>(total_rows));
6313 cost->add_cpu(cost_model->row_evaluate_cost(
6314 static_cast<double>(total_rows)) + 0.01);
6315 }
6316 return total_rows;
6317 }
6318
6319
6320 /**
6321 Get cost and other information about MRR scan over some sequence of ranges
6322
6323 Calculate estimated cost and other information about an MRR scan for some
6324 sequence of ranges.
6325
6326 The ranges themselves will be known only at execution phase. When this
6327 function is called we only know number of ranges and a (rough) E(#records)
6328 within those ranges.
6329
6330 Currently this function is only called for "n-keypart singlepoint" ranges,
6331 i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
6332
6333 The flags parameter is a combination of those flags: HA_MRR_SORTED,
6334 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
6335
6336 @param keyno Index number
6337 @param n_ranges Estimated number of ranges (i.e. intervals) in the
6338 range sequence.
6339 @param n_rows Estimated total number of records contained within all
6340 of the ranges
6341 @param bufsz[in,out] IN: Size of the buffer available for use
6342 OUT: Size of the buffer that will be actually used, or
6343 0 if buffer is not needed.
6344 @param flags[in,out] A combination of HA_MRR_* flags
6345 @param cost[out] Estimated cost of MRR access
6346
6347 @retval
6348 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
6349 parameters.
6350 @retval
6351 other Error or can't perform the requested scan
6352 */
6353
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint * bufsz,uint * flags,Cost_estimate * cost)6354 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
6355 uint *bufsz, uint *flags,
6356 Cost_estimate *cost)
6357 {
6358 *bufsz= 0; /* Default implementation doesn't need a buffer */
6359
6360 *flags|= HA_MRR_USE_DEFAULT_IMPL;
6361 *flags|= HA_MRR_SUPPORT_SORTED;
6362
6363 assert(cost->is_zero());
6364
6365 /* Produce the same cost as non-MRR code does */
6366 if (*flags & HA_MRR_INDEX_ONLY)
6367 *cost= index_scan_cost(keyno, n_ranges, n_rows);
6368 else
6369 *cost= read_cost(keyno, n_ranges, n_rows);
6370 return 0;
6371 }
6372
6373
6374 /**
6375 Initialize the MRR scan
6376
6377 Initialize the MRR scan. This function may do heavyweight scan
6378 initialization like row prefetching/sorting/etc (NOTE: but better not do
6379 it here as we may not need it, e.g. if we never satisfy WHERE clause on
6380 previous tables. For many implementations it would be natural to do such
6381 initializations in the first multi_read_range_next() call)
6382
6383 mode is a combination of the following flags: HA_MRR_SORTED,
6384 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
6385
6386 @param seq Range sequence to be traversed
6387 @param seq_init_param First parameter for seq->init()
6388 @param n_ranges Number of ranges in the sequence
6389 @param mode Flags, see the description section for the details
6390 @param buf INOUT: memory buffer to be used
6391
6392 @note
6393 One must have called index_init() before calling this function. Several
6394 multi_range_read_init() calls may be made in course of one query.
6395
6396 Until WL#2623 is done (see its text, section 3.2), the following will
6397 also hold:
6398 The caller will guarantee that if "seq->init == mrr_ranges_array_init"
6399 then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
6400 This property will only be used by NDB handler until WL#2623 is done.
6401
6402 Buffer memory management is done according to the following scenario:
6403 The caller allocates the buffer and provides it to the callee by filling
6404 the members of HANDLER_BUFFER structure.
6405 The callee consumes all or some fraction of the provided buffer space, and
6406 sets the HANDLER_BUFFER members accordingly.
6407 The callee may use the buffer memory until the next multi_range_read_init()
6408 call is made, all records have been read, or until index_end() call is
6409 made, whichever comes first.
6410
6411 @retval 0 OK
6412 @retval 1 Error
6413 */
6414
6415 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6416 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
6417 uint n_ranges, uint mode, HANDLER_BUFFER *buf)
6418 {
6419 DBUG_ENTER("handler::multi_range_read_init");
6420 mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
6421 mrr_funcs= *seq_funcs;
6422 mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
6423 mrr_have_range= FALSE;
6424 DBUG_RETURN(0);
6425 }
6426
6427
6428 /**
6429 Get next record in MRR scan
6430
6431 Default MRR implementation: read the next record
6432
6433 @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
6434 Otherwise, the opaque value associated with the range
6435 that contains the returned record.
6436
6437 @retval 0 OK
6438 @retval other Error code
6439 */
6440
multi_range_read_next(char ** range_info)6441 int handler::multi_range_read_next(char **range_info)
6442 {
6443 int result= HA_ERR_END_OF_FILE;
6444 int range_res;
6445 DBUG_ENTER("handler::multi_range_read_next");
6446
6447 // Set status for the need to update generated fields
6448 m_update_generated_read_fields= table->has_gcol();
6449
6450 if (!mrr_have_range)
6451 {
6452 mrr_have_range= TRUE;
6453 goto start;
6454 }
6455
6456 do
6457 {
6458 /* Save a call if there can be only one row in range. */
6459 if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
6460 {
6461 result= read_range_next();
6462 /* On success or non-EOF errors jump to the end. */
6463 if (result != HA_ERR_END_OF_FILE)
6464 break;
6465 }
6466 else
6467 {
6468 if (was_semi_consistent_read())
6469 goto scan_it_again;
6470 }
6471
6472 start:
6473 /* Try the next range(s) until one matches a record. */
6474 while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
6475 {
6476 scan_it_again:
6477 result= read_range_first(mrr_cur_range.start_key.keypart_map ?
6478 &mrr_cur_range.start_key : 0,
6479 mrr_cur_range.end_key.keypart_map ?
6480 &mrr_cur_range.end_key : 0,
6481 MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
6482 mrr_is_output_sorted);
6483 if (result != HA_ERR_END_OF_FILE)
6484 break;
6485 }
6486 }
6487 while ((result == HA_ERR_END_OF_FILE) && !range_res);
6488
6489 *range_info= mrr_cur_range.ptr;
6490
6491 /* Update virtual generated fields */
6492 if (!result && m_update_generated_read_fields)
6493 {
6494 result= update_generated_read_fields(table->record[0], table, active_index);
6495 m_update_generated_read_fields= false;
6496 }
6497
6498 DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
6499 DBUG_RETURN(result);
6500 }
6501
6502
6503 /****************************************************************************
6504 * DS-MRR implementation
6505 ***************************************************************************/
6506
6507 /**
6508 DS-MRR: Initialize and start MRR scan
6509
6510 Initialize and start the MRR scan. Depending on the mode parameter, this
6511 may use default or DS-MRR implementation.
6512
6513 The DS-MRR implementation will use a second handler object (h2) for
6514 doing scan on the index:
6515 - on the first call to this function the h2 handler will be created
6516 and h2 will be opened using the same index as the main handler
6517 is set to use. The index scan on the main index will be closed
6518 and it will be re-opened to read records from the table using either
6519 no key or the primary key. The h2 handler will be deleted when
6520 reset() is called (which should happen on the end of the statement).
6521 - when dsmrr_close() is called the index scan on h2 is closed.
6522 - on following calls to this function one of the following must be valid:
6523 a. if dsmrr_close has been called:
6524 the main handler (h) must be open on an index, h2 will be opened
6525 using this index, and the index on h will be closed and
6526 h will be re-opened to read reads from the table using either
6527 no key or the primary key.
6528 b. dsmrr_close has not been called:
6529 h2 will already be open, the main handler h must be set up
6530 to read records from the table (handler->inited is RND) either
6531 using the primary index or using no index at all.
6532
6533 @param h_arg Table handler to be used
6534 @param seq_funcs Interval sequence enumeration functions
6535 @param seq_init_param Interval sequence enumeration parameter
6536 @param n_ranges Number of ranges in the sequence.
6537 @param mode HA_MRR_* modes to use
6538 @param buf INOUT Buffer to use
6539
6540 @retval 0 Ok, Scan started.
6541 @retval other Error
6542 */
6543
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)6544 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
6545 void *seq_init_param, uint n_ranges, uint mode,
6546 HANDLER_BUFFER *buf)
6547 {
6548 uint elem_size;
6549 int retval= 0;
6550 DBUG_ENTER("DsMrr_impl::dsmrr_init");
6551 THD *thd= h_arg->table->in_use; // current THD
6552
6553 /*
6554 index_merge may invoke a scan on an object for which dsmrr_info[_const]
6555 has not been called, so set the owner handler here as well.
6556 */
6557 h= h_arg;
6558
6559 if (!hint_key_state(thd, h->table, h->active_index,
6560 MRR_HINT_ENUM, OPTIMIZER_SWITCH_MRR) ||
6561 mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) // DS-MRR doesn't sort
6562 {
6563 use_default_impl= TRUE;
6564 retval= h->handler::multi_range_read_init(seq_funcs, seq_init_param,
6565 n_ranges, mode, buf);
6566 DBUG_RETURN(retval);
6567 }
6568
6569 /*
6570 This assert will hit if we have pushed an index condition to the
6571 primary key index and then "change our mind" and use a different
6572 index for retrieving data with MRR. One of the following criteria
6573 must be true:
6574 1. We have not pushed an index conditon on this handler.
6575 2. We have pushed an index condition and this is on the currently used
6576 index.
6577 3. We have pushed an index condition but this is not for the primary key.
6578 4. We have pushed an index condition and this has been transferred to
6579 the clone (h2) of the handler object.
6580 */
6581 assert(!h->pushed_idx_cond ||
6582 h->pushed_idx_cond_keyno == h->active_index ||
6583 h->pushed_idx_cond_keyno != table->s->primary_key ||
6584 (h2 && h->pushed_idx_cond_keyno == h2->active_index));
6585
6586 rowids_buf= buf->buffer;
6587
6588 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
6589
6590 if (is_mrr_assoc)
6591 {
6592 assert(!thd->status_var_aggregated);
6593 table->in_use->status_var.ha_multi_range_read_init_count++;
6594 }
6595
6596 rowids_buf_end= buf->buffer_end;
6597 elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6598 rowids_buf_last= rowids_buf +
6599 ((rowids_buf_end - rowids_buf)/ elem_size)*
6600 elem_size;
6601 rowids_buf_end= rowids_buf_last;
6602
6603 /*
6604 The DS-MRR scan uses a second handler object (h2) for doing the
6605 index scan. Create this by cloning the primary handler
6606 object. The h2 handler object is deleted when DsMrr_impl::reset()
6607 is called.
6608 */
6609 if (!h2)
6610 {
6611 handler *new_h2;
6612 /*
6613 ::clone() takes up a lot of stack, especially on 64 bit platforms.
6614 The constant 5 is an empiric result.
6615 @todo Is this still the case? Leave it as it is for now but could
6616 likely be removed?
6617 */
6618 if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
6619 DBUG_RETURN(1);
6620
6621 if (!(new_h2= h->clone(h->table->s->normalized_path.str, thd->mem_root)))
6622 DBUG_RETURN(1);
6623 h2= new_h2; /* Ok, now can put it into h2 */
6624 table->prepare_for_position();
6625 }
6626
6627 /*
6628 Open the index scan on h2 using the key from the primary handler.
6629 */
6630 if (h2->active_index == MAX_KEY)
6631 {
6632 assert(h->active_index != MAX_KEY);
6633 const uint mrr_keyno= h->active_index;
6634
6635 if ((retval= h2->ha_external_lock(thd, h->m_lock_type)))
6636 goto error;
6637
6638 if ((retval= h2->extra(HA_EXTRA_KEYREAD)))
6639 goto error;
6640
6641 if ((retval= h2->ha_index_init(mrr_keyno, false)))
6642 goto error;
6643
6644 // Transfer ICP from h to h2
6645 if (mrr_keyno == h->pushed_idx_cond_keyno)
6646 {
6647 if (h2->idx_cond_push(mrr_keyno, h->pushed_idx_cond))
6648 {
6649 retval= 1;
6650 goto error;
6651 }
6652 }
6653 else
6654 {
6655 // Cancel any potentially previously pushed index conditions
6656 h2->cancel_pushed_idx_cond();
6657 }
6658 }
6659 else
6660 {
6661 /*
6662 h2 has already an open index. This happens when the DS-MRR scan
6663 is re-started without closing it first. In this case the primary
6664 handler must be used for reading records from the table, ie. it
6665 must not be opened for doing a new range scan. In this case
6666 the active_index must either not be set or be the primary key.
6667 */
6668 assert(h->inited == handler::RND);
6669 assert(h->active_index == MAX_KEY ||
6670 h->active_index == table->s->primary_key);
6671 }
6672
6673 /*
6674 The index scan is now transferred to h2 and we can close the open
6675 index scan on the primary handler.
6676 */
6677 if (h->inited == handler::INDEX)
6678 {
6679 /*
6680 Calling h->ha_index_end() will invoke dsmrr_close() for this object,
6681 which will close the index scan on h2. We need to keep it open, so
6682 temporarily move h2 out of the DsMrr object.
6683 */
6684 handler *save_h2= h2;
6685 h2= NULL;
6686 retval= h->ha_index_end();
6687 h2= save_h2;
6688 if (retval)
6689 goto error;
6690 }
6691
6692 /*
6693 Verify consistency between h and h2.
6694 */
6695 assert(h->inited != handler::INDEX);
6696 assert(h->active_index == MAX_KEY ||
6697 h->active_index == table->s->primary_key);
6698 assert(h2->inited == handler::INDEX);
6699 assert(h2->active_index != MAX_KEY);
6700 assert(h->m_lock_type == h2->m_lock_type);
6701
6702 if ((retval= h2->handler::multi_range_read_init(seq_funcs, seq_init_param,
6703 n_ranges, mode, buf)))
6704 goto error;
6705
6706 if ((retval= dsmrr_fill_buffer()))
6707 goto error;
6708
6709 /*
6710 If the above call has scanned through all intervals in *seq, then
6711 adjust *buf to indicate that the remaining buffer space will not be used.
6712 */
6713 if (dsmrr_eof)
6714 buf->end_of_used_area= rowids_buf_last;
6715
6716 /*
6717 h->inited == INDEX may occur when 'range checked for each record' is
6718 used.
6719 */
6720 if ((h->inited != handler::RND) &&
6721 ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
6722 (h->ha_rnd_init(FALSE))))
6723 {
6724 retval= 1;
6725 goto error;
6726 }
6727
6728 use_default_impl= FALSE;
6729 h->mrr_funcs= *seq_funcs;
6730
6731 DBUG_RETURN(0);
6732 error:
6733 h2->ha_index_or_rnd_end();
6734 h2->ha_external_lock(thd, F_UNLCK);
6735 h2->ha_close();
6736 delete h2;
6737 h2= NULL;
6738 assert(retval != 0);
6739 DBUG_RETURN(retval);
6740 }
6741
6742
dsmrr_close()6743 void DsMrr_impl::dsmrr_close()
6744 {
6745 DBUG_ENTER("DsMrr_impl::dsmrr_close");
6746
6747 // If there is an open index on h2, then close it
6748 if (h2 && h2->active_index != MAX_KEY)
6749 {
6750 h2->ha_index_or_rnd_end();
6751 h2->ha_external_lock(current_thd, F_UNLCK);
6752 }
6753 use_default_impl= true;
6754 DBUG_VOID_RETURN;
6755 }
6756
6757
reset()6758 void DsMrr_impl::reset()
6759 {
6760 DBUG_ENTER("DsMrr_impl::reset");
6761
6762 if (h2)
6763 {
6764 // Close any ongoing DS-MRR scan
6765 dsmrr_close();
6766
6767 // Close and delete the h2 handler
6768 h2->ha_close();
6769 delete h2;
6770 h2= NULL;
6771 }
6772 DBUG_VOID_RETURN;
6773 }
6774
6775
rowid_cmp(void * h,uchar * a,uchar * b)6776 static int rowid_cmp(void *h, uchar *a, uchar *b)
6777 {
6778 return ((handler*)h)->cmp_ref(a, b);
6779 }
6780
6781
6782 /**
6783 DS-MRR: Fill the buffer with rowids and sort it by rowid
6784
6785 {This is an internal function of DiskSweep MRR implementation}
6786 Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
6787 buffer. When the buffer is full or scan is completed, sort the buffer by
6788 rowid and return.
6789
6790 The function assumes that rowids buffer is empty when it is invoked.
6791
6792 @param h Table handler
6793
6794 @retval 0 OK, the next portion of rowids is in the buffer,
6795 properly ordered
6796 @retval other Error
6797 */
6798
dsmrr_fill_buffer()6799 int DsMrr_impl::dsmrr_fill_buffer()
6800 {
6801 char *range_info;
6802 int res= 0;
6803 DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
6804 assert(rowids_buf < rowids_buf_end);
6805
6806 /*
6807 Set key_read to TRUE since we only read fields from the index.
6808 This ensures that any virtual columns are read from index and are not
6809 attempted to be evaluated from base columns.
6810 (Do not use TABLE::set_keyread() since the MRR implementation operates
6811 with two handler objects, and set_keyread() would manipulate the keyread
6812 property of the wrong handler. MRR sets the handlers' keyread properties
6813 when initializing the MRR operation, independent of this call).
6814 */
6815 assert(table->key_read == FALSE);
6816 table->key_read= TRUE;
6817
6818 rowids_buf_cur= rowids_buf;
6819 while ((rowids_buf_cur < rowids_buf_end) &&
6820 !(res= h2->handler::multi_range_read_next(&range_info)))
6821 {
6822 KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
6823 if (h2->mrr_funcs.skip_index_tuple &&
6824 h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
6825 continue;
6826
6827 /* Put rowid, or {rowid, range_id} pair into the buffer */
6828 h2->position(table->record[0]);
6829 memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
6830 rowids_buf_cur += h2->ref_length;
6831
6832 if (is_mrr_assoc)
6833 {
6834 memcpy(rowids_buf_cur, &range_info, sizeof(void*));
6835 rowids_buf_cur += sizeof(void*);
6836 }
6837 }
6838
6839 // Restore key_read since the next read operation will read complete rows
6840 table->key_read= FALSE;
6841
6842 if (res && res != HA_ERR_END_OF_FILE)
6843 DBUG_RETURN(res);
6844 dsmrr_eof= MY_TEST(res == HA_ERR_END_OF_FILE);
6845
6846 /* Sort the buffer contents by rowid */
6847 uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
6848 size_t n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
6849
6850 my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
6851 (void*)h);
6852 rowids_buf_last= rowids_buf_cur;
6853 rowids_buf_cur= rowids_buf;
6854 DBUG_RETURN(0);
6855 }
6856
6857
6858 /*
6859 DS-MRR implementation: multi_range_read_next() function
6860 */
6861
dsmrr_next(char ** range_info)6862 int DsMrr_impl::dsmrr_next(char **range_info)
6863 {
6864 int res;
6865 uchar *cur_range_info= 0;
6866 uchar *rowid;
6867
6868 if (use_default_impl)
6869 return h->handler::multi_range_read_next(range_info);
6870
6871 do
6872 {
6873 if (rowids_buf_cur == rowids_buf_last)
6874 {
6875 if (dsmrr_eof)
6876 {
6877 res= HA_ERR_END_OF_FILE;
6878 goto end;
6879 }
6880
6881 res= dsmrr_fill_buffer();
6882 if (res)
6883 goto end;
6884 }
6885
6886 /* return eof if there are no rowids in the buffer after re-fill attempt */
6887 if (rowids_buf_cur == rowids_buf_last)
6888 {
6889 res= HA_ERR_END_OF_FILE;
6890 goto end;
6891 }
6892 rowid= rowids_buf_cur;
6893
6894 if (is_mrr_assoc)
6895 memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar*));
6896
6897 rowids_buf_cur += h->ref_length + sizeof(void*) * MY_TEST(is_mrr_assoc);
6898 if (h2->mrr_funcs.skip_record &&
6899 h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
6900 continue;
6901 res= h->ha_rnd_pos(table->record[0], rowid);
6902 break;
6903 } while (true);
6904
6905 if (is_mrr_assoc)
6906 {
6907 memcpy(range_info, rowid + h->ref_length, sizeof(void*));
6908 }
6909 end:
6910 return res;
6911 }
6912
6913
6914 /*
6915 DS-MRR implementation: multi_range_read_info() function
6916 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint * bufsz,uint * flags,Cost_estimate * cost)6917 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
6918 uint *bufsz, uint *flags, Cost_estimate *cost)
6919 {
6920 ha_rows res MY_ATTRIBUTE((unused));
6921 uint def_flags= *flags;
6922 uint def_bufsz= *bufsz;
6923
6924 /* Get cost/flags/mem_usage of default MRR implementation */
6925 res=
6926 h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
6927 &def_flags, cost);
6928 assert(!res);
6929
6930 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6931 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6932 {
6933 /* Default implementation is choosen */
6934 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6935 *flags= def_flags;
6936 *bufsz= def_bufsz;
6937 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
6938 }
6939 else
6940 {
6941 /* *flags and *bufsz were set by choose_mrr_impl */
6942 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6943 }
6944 return 0;
6945 }
6946
6947
6948 /*
6949 DS-MRR Implementation: multi_range_read_info_const() function
6950 */
6951
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)6952 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
6953 void *seq_init_param, uint n_ranges,
6954 uint *bufsz, uint *flags, Cost_estimate *cost)
6955 {
6956 ha_rows rows;
6957 uint def_flags= *flags;
6958 uint def_bufsz= *bufsz;
6959 /* Get cost/flags/mem_usage of default MRR implementation */
6960 rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
6961 n_ranges, &def_bufsz,
6962 &def_flags, cost);
6963 if (rows == HA_POS_ERROR)
6964 {
6965 /* Default implementation can't perform MRR scan => we can't either */
6966 return rows;
6967 }
6968
6969 /*
6970 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
6971 use the default MRR implementation (we need it for UPDATE/DELETE).
6972 Otherwise, make a choice based on cost and mrr* flags of
6973 @@optimizer_switch.
6974 */
6975 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
6976 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
6977 {
6978 DBUG_PRINT("info", ("Default MRR implementation choosen"));
6979 *flags= def_flags;
6980 *bufsz= def_bufsz;
6981 assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
6982 }
6983 else
6984 {
6985 /* *flags and *bufsz were set by choose_mrr_impl */
6986 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
6987 }
6988 return rows;
6989 }
6990
6991
6992 /**
6993 DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
6994
6995 Make the choice between using Default MRR implementation and DS-MRR.
6996 This function contains common functionality factored out of dsmrr_info()
6997 and dsmrr_info_const(). The function assumes that the default MRR
6998 implementation's applicability requirements are satisfied.
6999
7000 @param keyno Index number
7001 @param rows E(full rows to be retrieved)
7002 @param flags IN MRR flags provided by the MRR user
7003 OUT If DS-MRR is choosen, flags of DS-MRR implementation
7004 else the value is not modified
7005 @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
7006 else the value is not modified
7007 @param cost IN Cost of default MRR implementation
7008 OUT If DS-MRR is choosen, cost of DS-MRR scan
7009 else the value is not modified
7010
7011 @retval TRUE Default MRR implementation should be used
7012 @retval FALSE DS-MRR implementation should be used
7013 */
7014
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)7015 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
7016 uint *bufsz, Cost_estimate *cost)
7017 {
7018 bool res;
7019 THD *thd= current_thd;
7020
7021 const bool mrr_on= hint_key_state(thd, table, keyno, MRR_HINT_ENUM,
7022 OPTIMIZER_SWITCH_MRR);
7023 const bool force_dsmrr_by_hints=
7024 hint_key_state(thd, table, keyno, MRR_HINT_ENUM, 0) ||
7025 hint_table_state(thd, table, BKA_HINT_ENUM, 0);
7026
7027 if (!(mrr_on || force_dsmrr_by_hints) ||
7028 *flags & (HA_MRR_INDEX_ONLY | HA_MRR_SORTED) || // Unsupported by DS-MRR
7029 (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
7030 key_uses_partial_cols(table, keyno) ||
7031 table->s->tmp_table != NO_TMP_TABLE)
7032 {
7033 /* Use the default implementation, don't modify args: See comments */
7034 return TRUE;
7035 }
7036
7037 /*
7038 If @@optimizer_switch has "mrr_cost_based" on, we should avoid
7039 using DS-MRR for queries where it is likely that the records are
7040 stored in memory. Since there is currently no way to determine
7041 this, we use a heuristic:
7042 a) if the storage engine has a memory buffer, DS-MRR is only
7043 considered if the table size is bigger than the buffer.
7044 b) if the storage engine does not have a memory buffer, DS-MRR is
7045 only considered if the table size is bigger than 100MB.
7046 c) Since there is an initial setup cost of DS-MRR, so it is only
7047 considered if at least 50 records will be read.
7048 */
7049 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED) &&
7050 !force_dsmrr_by_hints)
7051 {
7052 /*
7053 If the storage engine has a database buffer we use this as the
7054 minimum size the table should have before considering DS-MRR.
7055 */
7056 longlong min_file_size= table->file->get_memory_buffer_size();
7057 if (min_file_size == -1)
7058 {
7059 // No estimate for database buffer
7060 min_file_size= 100 * 1024 * 1024; // 100 MB
7061 }
7062
7063 if (table->file->stats.data_file_length <
7064 static_cast<ulonglong>(min_file_size) ||
7065 rows <= 50)
7066 return true; // Use the default implementation
7067 }
7068
7069 Cost_estimate dsmrr_cost;
7070 if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
7071 return TRUE;
7072
7073 /*
7074 If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost
7075 of DS-MRR to be minimum of DS-MRR and Default implementations cost. This
7076 allows one to force use of DS-MRR whenever it is applicable without
7077 affecting other cost-based choices. Note that if MRR or BKA hint is
7078 specified, DS-MRR will be used regardless of cost.
7079 */
7080 const bool force_dsmrr=
7081 (force_dsmrr_by_hints ||
7082 !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED));
7083
7084 if (force_dsmrr && dsmrr_cost.total_cost() > cost->total_cost())
7085 dsmrr_cost= *cost;
7086
7087 if (force_dsmrr || (dsmrr_cost.total_cost() <= cost->total_cost()))
7088 {
7089 *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
7090 *flags &= ~HA_MRR_SUPPORT_SORTED; /* We can't provide ordered output */
7091 *cost= dsmrr_cost;
7092 res= FALSE;
7093 }
7094 else
7095 {
7096 /* Use the default MRR implementation */
7097 res= TRUE;
7098 }
7099 return res;
7100 }
7101
7102
7103 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
7104 Cost_estimate *cost);
7105
7106
7107 /**
7108 Get cost of DS-MRR scan
7109
7110 @param keynr Index to be used
7111 @param rows E(Number of rows to be scanned)
7112 @param flags Scan parameters (HA_MRR_* flags)
7113 @param buffer_size INOUT Buffer size
7114 @param cost OUT The cost
7115
7116 @retval FALSE OK
7117 @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
7118 for even 1 rowid)
7119 */
7120
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,Cost_estimate * cost)7121 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
7122 uint *buffer_size,
7123 Cost_estimate *cost)
7124 {
7125 ha_rows rows_in_last_step;
7126 uint n_full_steps;
7127
7128 const uint elem_size= h->ref_length +
7129 sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
7130 const ha_rows max_buff_entries= *buffer_size / elem_size;
7131
7132 if (!max_buff_entries)
7133 return TRUE; /* Buffer has not enough space for even 1 rowid */
7134
7135 /* Number of iterations we'll make with full buffer */
7136 n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
7137
7138 /*
7139 Get numbers of rows we'll be processing in last iteration, with
7140 non-full buffer
7141 */
7142 rows_in_last_step= rows % max_buff_entries;
7143
7144 assert(cost->is_zero());
7145
7146 if (n_full_steps)
7147 {
7148 get_sort_and_sweep_cost(table, max_buff_entries, cost);
7149 cost->multiply(n_full_steps);
7150 }
7151 else
7152 {
7153 /*
7154 Adjust buffer size since only parts of the buffer will be used:
7155 1. Adjust record estimate for the last scan to reduce likelyhood
7156 of needing more than one scan by adding 20 percent to the
7157 record estimate and by ensuring this is at least 100 records.
7158 2. If the estimated needed buffer size is lower than suggested by
7159 the caller then set it to the estimated buffer size.
7160 */
7161 const ha_rows keys_in_buffer=
7162 max<ha_rows>(static_cast<ha_rows>(1.2 * rows_in_last_step), 100);
7163 *buffer_size= min<ulong>(*buffer_size,
7164 static_cast<ulong>(keys_in_buffer) * elem_size);
7165 }
7166
7167 Cost_estimate last_step_cost;
7168 get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
7169 (*cost)+= last_step_cost;
7170
7171 /*
7172 Cost of memory is not included in the total_cost() function and
7173 thus will not be considered when comparing costs. Still, we
7174 record it in the cost estimate object for future use.
7175 */
7176 cost->add_mem(*buffer_size);
7177
7178 /* Total cost of all index accesses */
7179 (*cost)+= h->index_scan_cost(keynr, 1, static_cast<double>(rows));
7180
7181 /*
7182 Add CPU cost for processing records (see
7183 @handler::multi_range_read_info_const()).
7184 */
7185 cost->add_cpu(table->cost_model()->row_evaluate_cost(
7186 static_cast<double>(rows)));
7187 return FALSE;
7188 }
7189
7190
7191 /*
7192 Get cost of one sort-and-sweep step
7193
7194 SYNOPSIS
7195 get_sort_and_sweep_cost()
7196 table Table being accessed
7197 nrows Number of rows to be sorted and retrieved
7198 cost OUT The cost
7199
7200 DESCRIPTION
7201 Get cost of these operations:
7202 - sort an array of #nrows ROWIDs using qsort
7203 - read #nrows records from table in a sweep.
7204 */
7205
7206 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)7207 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
7208 {
7209 assert(cost->is_zero());
7210 if (nrows)
7211 {
7212 get_sweep_read_cost(table, nrows, FALSE, cost);
7213
7214 /*
7215 @todo CostModel: For the old version of the cost model the
7216 following code should be used. For the new version of the cost
7217 model Cost_model::key_compare_cost() should be used. When
7218 removing support for the old cost model this code should be
7219 removed. The reason for this is that we should get rid of the
7220 ROWID_COMPARE_SORT_COST and use key_compare_cost() instead. For
7221 the current value returned by key_compare_cost() this would
7222 overestimate the cost for sorting.
7223 */
7224
7225 /*
7226 Constant for the cost of doing one key compare operation in the
7227 sort operation. We should have used the value returned by
7228 key_compare_cost() here but this would make the cost
7229 estimate of sorting very high for queries accessing many
7230 records. Until this constant is adjusted we introduce a constant
7231 that is more realistic. @todo: Replace this with
7232 key_compare_cost() when this has been given a realistic value.
7233 */
7234 const double ROWID_COMPARE_SORT_COST=
7235 table->cost_model()->key_compare_cost(1.0) / 10;
7236
7237 /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
7238
7239 // For the old version of the cost model this cost calculations should
7240 // be used....
7241 const double cpu_sort= nrows * log2(nrows) * ROWID_COMPARE_SORT_COST;
7242 // .... For the new cost model something like this should be used...
7243 // cpu_sort= nrows * log2(nrows) *
7244 // table->cost_model()->rowid_compare_cost();
7245 cost->add_cpu(cpu_sort);
7246 }
7247 }
7248
7249
7250 /**
7251 Get cost of reading nrows table records in a "disk sweep"
7252
7253 A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
7254 for an ordered sequence of rowids.
7255
7256 We take into account that some of the records might be in a memory
7257 buffer while others need to be read from a secondary storage
7258 device. The model for this assumes hard disk IO. A disk read is
7259 performed as follows:
7260
7261 1. The disk head is moved to the needed cylinder
7262 2. The controller waits for the plate to rotate
7263 3. The data is transferred
7264
7265 Time to do #3 is insignificant compared to #2+#1.
7266
7267 Time to move the disk head is proportional to head travel distance.
7268
7269 Time to wait for the plate to rotate depends on whether the disk head
7270 was moved or not.
7271
7272 If disk head wasn't moved, the wait time is proportional to distance
7273 between the previous block and the block we're reading.
7274
7275 If the head was moved, we don't know how much we'll need to wait for the
7276 plate to rotate. We assume the wait time to be a variate with a mean of
7277 0.5 of full rotation time.
7278
7279 Our cost units are "random disk seeks". The cost of random disk seek is
7280 actually not a constant, it depends one range of cylinders we're going
7281 to access. We make it constant by introducing a fuzzy concept of "typical
7282 datafile length" (it's fuzzy as it's hard to tell whether it should
7283 include index file, temp.tables etc). Then random seek cost is:
7284
7285 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
7286
7287 We define half_rotation_cost as disk_seek_base_cost() (see
7288 Cost_model_server::disk_seek_base_cost()).
7289
7290 @param table Table to be accessed
7291 @param nrows Number of rows to retrieve
7292 @param interrupted true <=> Assume that the disk sweep will be
7293 interrupted by other disk IO. false - otherwise.
7294 @param[out] cost the cost
7295 */
7296
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)7297 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
7298 Cost_estimate *cost)
7299 {
7300 DBUG_ENTER("get_sweep_read_cost");
7301
7302 assert(cost->is_zero());
7303 if(nrows > 0)
7304 {
7305 const Cost_model_table *const cost_model= table->cost_model();
7306
7307 // The total number of blocks used by this table
7308 double n_blocks=
7309 ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
7310 if (n_blocks < 1.0) // When data_file_length is 0
7311 n_blocks= 1.0;
7312
7313 /*
7314 The number of blocks that in average need to be read given that
7315 the records are uniformly distribution over the table.
7316 */
7317 double busy_blocks=
7318 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
7319 if (busy_blocks < 1.0)
7320 busy_blocks= 1.0;
7321
7322 DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
7323 busy_blocks));
7324 /*
7325 The random access cost for reading the data pages will be the upper
7326 limit for the sweep_cost.
7327 */
7328 cost->add_io(cost_model->page_read_cost(busy_blocks));
7329 if (!interrupted)
7330 {
7331 Cost_estimate sweep_cost;
7332 /*
7333 Assume reading pages from disk is done in one 'sweep'.
7334
7335 The cost model and cost estimate for pages already in a memory
7336 buffer will be different from pages that needed to be read from
7337 disk. Calculate the number of blocks that likely already are
7338 in memory and the number of blocks that need to be read from
7339 disk.
7340 */
7341 const double busy_blocks_mem=
7342 busy_blocks * table->file->table_in_memory_estimate();
7343 const double busy_blocks_disk= busy_blocks - busy_blocks_mem;
7344 assert(busy_blocks_disk >= 0.0);
7345
7346 // Cost of accessing blocks in main memory buffer
7347 sweep_cost.add_io(cost_model->buffer_block_read_cost(busy_blocks_mem));
7348
7349 // Cost of reading blocks from disk in a 'sweep'
7350 const double seek_distance= (busy_blocks_disk > 1.0) ?
7351 n_blocks / busy_blocks_disk : n_blocks;
7352
7353 const double disk_cost=
7354 busy_blocks_disk * cost_model->disk_seek_cost(seek_distance);
7355 sweep_cost.add_io(disk_cost);
7356
7357 /*
7358 For some cases, ex: when only few blocks need to be read and the
7359 seek distance becomes very large, the sweep cost model can produce
7360 a cost estimate that is larger than the cost of random access.
7361 To handle this case, we use the sweep cost only when it is less
7362 than the random access cost.
7363 */
7364 if (sweep_cost < *cost)
7365 *cost= sweep_cost;
7366 }
7367 }
7368 DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
7369 DBUG_VOID_RETURN;
7370 }
7371
7372
7373 /****************************************************************************
7374 * DS-MRR implementation ends
7375 ***************************************************************************/
7376
7377 /** @brief
7378 Read first row between two ranges.
7379 Store ranges for future calls to read_range_next.
7380
7381 @param start_key Start key. Is 0 if no min range
7382 @param end_key End key. Is 0 if no max range
7383 @param eq_range_arg Set to 1 if start_key == end_key
7384 @param sorted Set to 1 if result should be sorted per key
7385
7386 @note
7387 Record is read into table->record[0]
7388
7389 @retval
7390 0 Found row
7391 @retval
7392 HA_ERR_END_OF_FILE No rows in range
7393 @retval
7394 \# Error code
7395 */
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)7396 int handler::read_range_first(const key_range *start_key,
7397 const key_range *end_key,
7398 bool eq_range_arg,
7399 bool sorted /* ignored */)
7400 {
7401 int result;
7402 DBUG_ENTER("handler::read_range_first");
7403
7404 eq_range= eq_range_arg;
7405 set_end_range(end_key, RANGE_SCAN_ASC);
7406
7407 range_key_part= table->key_info[active_index].key_part;
7408
7409 if (!start_key) // Read first record
7410 result= ha_index_first(table->record[0]);
7411 else
7412 result= ha_index_read_map(table->record[0],
7413 start_key->key,
7414 start_key->keypart_map,
7415 start_key->flag);
7416 if (result)
7417 DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND)
7418 ? HA_ERR_END_OF_FILE
7419 : result);
7420
7421 if (compare_key(end_range) <= 0)
7422 {
7423 DBUG_RETURN(0);
7424 }
7425 else
7426 {
7427 /*
7428 The last read row does not fall in the range. So request
7429 storage engine to release row lock if possible.
7430 */
7431 unlock_row();
7432 DBUG_RETURN(HA_ERR_END_OF_FILE);
7433 }
7434 }
7435
7436
7437 /** @brief
7438 Read next row between two endpoints.
7439
7440 @note
7441 Record is read into table->record[0]
7442
7443 @retval
7444 0 Found row
7445 @retval
7446 HA_ERR_END_OF_FILE No rows in range
7447 @retval
7448 \# Error code
7449 */
read_range_next()7450 int handler::read_range_next()
7451 {
7452 int result;
7453 DBUG_ENTER("handler::read_range_next");
7454
7455 if (eq_range)
7456 {
7457 /* We trust that index_next_same always gives a row in range */
7458 DBUG_RETURN(ha_index_next_same(table->record[0],
7459 end_range->key,
7460 end_range->length));
7461 }
7462 result= ha_index_next(table->record[0]);
7463 if (result)
7464 DBUG_RETURN(result);
7465
7466 if (compare_key(end_range) <= 0)
7467 {
7468 DBUG_RETURN(0);
7469 }
7470 else
7471 {
7472 /*
7473 The last read row does not fall in the range. So request
7474 storage engine to release row lock if possible.
7475 */
7476 unlock_row();
7477 DBUG_RETURN(HA_ERR_END_OF_FILE);
7478 }
7479 }
7480
7481 /**
7482 Check if one of the columns in a key is a virtual generated column.
7483 @param part the first part of the key to check
7484 @param length the length of the key
7485 @retval true if the key contains a virtual generated column
7486 @retval false if the key does not contain a virtual generated column
7487 */
key_has_vcol(const KEY_PART_INFO * part,uint length)7488 static bool key_has_vcol(const KEY_PART_INFO *part, uint length) {
7489 for (uint len = 0; len < length; len += part->store_length, ++part)
7490 if (part->field->is_virtual_gcol()) return true;
7491 return false;
7492 }
7493
set_end_range(const key_range * range,enum_range_scan_direction direction)7494 void handler::set_end_range(const key_range* range,
7495 enum_range_scan_direction direction)
7496 {
7497 if (range)
7498 {
7499 save_end_range= *range;
7500 end_range= &save_end_range;
7501 range_key_part= table->key_info[active_index].key_part;
7502 key_compare_result_on_equal= ((range->flag == HA_READ_BEFORE_KEY) ? 1 :
7503 (range->flag == HA_READ_AFTER_KEY) ? -1 : 0);
7504 m_virt_gcol_in_end_range = key_has_vcol(range_key_part, range->length);
7505 }
7506 else
7507 end_range= NULL;
7508
7509 range_scan_direction= direction;
7510 }
7511
7512
7513 /**
7514 Compare if found key (in row) is over max-value.
7515
7516 @param range range to compare to row. May be 0 for no range
7517
7518 @seealso
7519 key.cc::key_cmp()
7520
7521 @return
7522 The return value is SIGN(key_in_row - range_key):
7523
7524 - 0 : Key is equal to range or 'range' == 0 (no range)
7525 - -1 : Key is less than range
7526 - 1 : Key is larger than range
7527 */
compare_key(key_range * range)7528 int handler::compare_key(key_range *range)
7529 {
7530 int cmp;
7531 if (!range || in_range_check_pushed_down)
7532 return 0; // No max range
7533 cmp= key_cmp(range_key_part, range->key, range->length);
7534 if (!cmp)
7535 cmp= key_compare_result_on_equal;
7536 return cmp;
7537 }
7538
7539
7540 /*
7541 Compare if a found key (in row) is within the range.
7542
7543 This function is similar to compare_key() but checks the range scan
7544 direction to determine if this is a descending scan. This function
7545 is used by the index condition pushdown implementation to determine
7546 if the read record is within the range scan.
7547
7548 @param range Range to compare to row. May be NULL for no range.
7549
7550 @seealso
7551 handler::compare_key()
7552
7553 @return Returns whether the key is within the range
7554
7555 - 0 : Key is equal to range or 'range' == 0 (no range)
7556 - -1 : Key is within the current range
7557 - 1 : Key is outside the current range
7558 */
7559
compare_key_icp(const key_range * range) const7560 int handler::compare_key_icp(const key_range *range) const
7561 {
7562 int cmp;
7563 if (!range)
7564 return 0; // no max range
7565 cmp= key_cmp(range_key_part, range->key, range->length);
7566 if (!cmp)
7567 cmp= key_compare_result_on_equal;
7568 if (range_scan_direction == RANGE_SCAN_DESC)
7569 cmp= -cmp;
7570 return cmp;
7571 }
7572
7573 /**
7574 Change the offsets of all the fields in a key range.
7575
7576 @param range the key range
7577 @param key_part the first key part
7578 @param diff how much to change the offsets with
7579 */
7580 static inline void
move_key_field_offsets(const key_range * range,const KEY_PART_INFO * key_part,my_ptrdiff_t diff)7581 move_key_field_offsets(const key_range *range, const KEY_PART_INFO *key_part,
7582 my_ptrdiff_t diff)
7583 {
7584 for (size_t len= 0; len < range->length;
7585 len+= key_part->store_length, ++key_part)
7586 key_part->field->move_field_offset(diff);
7587 }
7588
7589 /**
7590 Check if the key in the given buffer (which is not necessarily
7591 TABLE::record[0]) is within range. Called by the storage engine to
7592 avoid reading too many rows.
7593
7594 @param buf the buffer that holds the key
7595 @retval -1 if the key is within the range
7596 @retval 0 if the key is equal to the end_range key, and
7597 key_compare_result_on_equal is 0
7598 @retval 1 if the key is outside the range
7599 */
compare_key_in_buffer(const uchar * buf) const7600 int handler::compare_key_in_buffer(const uchar *buf) const
7601 {
7602 assert(end_range != NULL);
7603
7604 /*
7605 End range on descending scans is only checked with ICP for now, and then we
7606 check it with compare_key_icp() instead of this function.
7607 */
7608 assert(range_scan_direction == RANGE_SCAN_ASC);
7609
7610 // Make the fields in the key point into the buffer instead of record[0].
7611 const my_ptrdiff_t diff= buf - table->record[0];
7612 if (diff != 0)
7613 move_key_field_offsets(end_range, range_key_part, diff);
7614
7615 // Compare the key in buf against end_range.
7616 int cmp= key_cmp(range_key_part, end_range->key, end_range->length);
7617 if (cmp == 0)
7618 cmp= key_compare_result_on_equal;
7619
7620 // Reset the field offsets.
7621 if (diff != 0)
7622 move_key_field_offsets(end_range, range_key_part, -diff);
7623
7624 return cmp;
7625 }
7626
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)7627 int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
7628 key_part_map keypart_map,
7629 enum ha_rkey_function find_flag)
7630 {
7631 int error, error1;
7632 error= index_init(index, 0);
7633 if (!error)
7634 {
7635 error= index_read_map(buf, key, keypart_map, find_flag);
7636 error1= index_end();
7637 }
7638 return error ? error : error1;
7639 }
7640
7641
calculate_key_len(TABLE * table,uint key,key_part_map keypart_map)7642 uint calculate_key_len(TABLE *table, uint key,
7643 key_part_map keypart_map)
7644 {
7645 /* works only with key prefixes */
7646 assert(((keypart_map + 1) & keypart_map) == 0);
7647
7648 KEY *key_info= table->key_info + key;
7649 KEY_PART_INFO *key_part= key_info->key_part;
7650 KEY_PART_INFO *end_key_part= key_part + actual_key_parts(key_info);
7651 uint length= 0;
7652
7653 while (key_part < end_key_part && keypart_map)
7654 {
7655 length+= key_part->store_length;
7656 keypart_map >>= 1;
7657 key_part++;
7658 }
7659 return length;
7660 }
7661
7662
7663 /**
7664 Returns a list of all known extensions.
7665
7666 No mutexes, worst case race is a minor surplus memory allocation
7667 We have to recreate the extension map if mysqld is restarted (for example
7668 within libmysqld)
7669
7670 @retval
7671 pointer pointer to TYPELIB structure
7672 */
exts_handlerton(THD * unused,plugin_ref plugin,void * arg)7673 static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
7674 void *arg)
7675 {
7676 List<char> *found_exts= (List<char> *) arg;
7677 handlerton *hton= plugin_data<handlerton*>(plugin);
7678 handler *file;
7679 if (hton->state == SHOW_OPTION_YES && hton->create &&
7680 (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
7681 {
7682 List_iterator_fast<char> it(*found_exts);
7683 const char **ext, *old_ext;
7684
7685 for (ext= file->bas_ext(); *ext; ext++)
7686 {
7687 while ((old_ext= it++))
7688 {
7689 if (!strcmp(old_ext, *ext))
7690 break;
7691 }
7692 if (!old_ext)
7693 found_exts->push_back((char *) *ext);
7694
7695 it.rewind();
7696 }
7697 delete file;
7698 }
7699 return FALSE;
7700 }
7701
ha_known_exts()7702 TYPELIB* ha_known_exts()
7703 {
7704 TYPELIB *known_extensions = (TYPELIB*) sql_alloc(sizeof(TYPELIB));
7705 known_extensions->name= "known_exts";
7706 known_extensions->type_lengths= NULL;
7707
7708 List<char> found_exts;
7709 const char **ext, *old_ext;
7710
7711 found_exts.push_back((char*) TRG_EXT);
7712 found_exts.push_back((char*) TRN_EXT);
7713
7714 plugin_foreach(NULL, exts_handlerton,
7715 MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
7716
7717 size_t arr_length= sizeof(char *)* (found_exts.elements+1);
7718 ext= (const char **) sql_alloc(arr_length);
7719
7720 assert(NULL != ext);
7721 known_extensions->count= found_exts.elements;
7722 known_extensions->type_names= ext;
7723
7724 List_iterator_fast<char> it(found_exts);
7725 while ((old_ext= it++))
7726 *ext++= old_ext;
7727 *ext= NULL;
7728 return known_extensions;
7729 }
7730
7731
stat_print(THD * thd,const char * type,size_t type_len,const char * file,size_t file_len,const char * status,size_t status_len)7732 static bool stat_print(THD *thd, const char *type, size_t type_len,
7733 const char *file, size_t file_len,
7734 const char *status, size_t status_len)
7735 {
7736 Protocol *protocol= thd->get_protocol();
7737 protocol->start_row();
7738 protocol->store(type, type_len, system_charset_info);
7739 protocol->store(file, file_len, system_charset_info);
7740 protocol->store(status, status_len, system_charset_info);
7741 if (protocol->end_row())
7742 return TRUE;
7743 return FALSE;
7744 }
7745
7746
showstat_handlerton(THD * thd,plugin_ref plugin,void * arg)7747 static my_bool showstat_handlerton(THD *thd, plugin_ref plugin,
7748 void *arg)
7749 {
7750 enum ha_stat_type stat= *(enum ha_stat_type *) arg;
7751 handlerton *hton= plugin_data<handlerton*>(plugin);
7752 if (hton->state == SHOW_OPTION_YES && hton->show_status &&
7753 hton->show_status(hton, thd, stat_print, stat))
7754 return TRUE;
7755 return FALSE;
7756 }
7757
ha_show_status(THD * thd,handlerton * db_type,enum ha_stat_type stat)7758 bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
7759 {
7760 List<Item> field_list;
7761 bool result;
7762
7763 field_list.push_back(new Item_empty_string("Type",10));
7764 field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
7765 field_list.push_back(new Item_empty_string("Status",10));
7766
7767 if (thd->send_result_metadata(&field_list,
7768 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7769 return TRUE;
7770
7771 if (db_type == NULL)
7772 {
7773 result= plugin_foreach(thd, showstat_handlerton,
7774 MYSQL_STORAGE_ENGINE_PLUGIN, &stat);
7775 }
7776 else
7777 {
7778 if (db_type->state != SHOW_OPTION_YES)
7779 {
7780 const LEX_STRING *name=&hton2plugin[db_type->slot]->name;
7781 result= stat_print(thd, name->str, name->length,
7782 "", 0, "DISABLED", 8) ? 1 : 0;
7783 }
7784 else
7785 {
7786 DBUG_EXECUTE_IF("simulate_show_status_failure",
7787 DBUG_SET("+d,simulate_net_write_failure"););
7788 result= db_type->show_status &&
7789 db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
7790 DBUG_EXECUTE_IF("simulate_show_status_failure",
7791 DBUG_SET("-d,simulate_net_write_failure"););
7792 }
7793 }
7794
7795 if (!result)
7796 my_eof(thd);
7797 return result;
7798 }
7799
7800 /*
7801 Function to check if the conditions for row-based binlogging is
7802 correct for the table.
7803
7804 A row in the given table should be replicated if:
7805 - Row-based replication is enabled in the current thread
7806 - The binlog is enabled
7807 - It is not a temporary table
7808 - The binary log is open
7809 - The database the table resides in shall be binlogged (binlog_*_db rules)
7810 - table is not mysql.event
7811 */
7812
check_table_binlog_row_based(THD * thd,TABLE * table)7813 static bool check_table_binlog_row_based(THD *thd, TABLE *table)
7814 {
7815 if (table->s->cached_row_logging_check == -1)
7816 {
7817 int const check(table->s->tmp_table == NO_TMP_TABLE &&
7818 ! table->no_replicate &&
7819 binlog_filter->db_ok(table->s->db.str));
7820 table->s->cached_row_logging_check= check;
7821 }
7822
7823 assert(table->s->cached_row_logging_check == 0 ||
7824 table->s->cached_row_logging_check == 1);
7825
7826 return (thd->is_current_stmt_binlog_format_row() &&
7827 table->s->cached_row_logging_check &&
7828 (thd->variables.option_bits & OPTION_BIN_LOG) &&
7829 mysql_bin_log.is_open());
7830 }
7831
7832
7833 /** @brief
7834 Write table maps for all (manually or automatically) locked tables
7835 to the binary log.
7836
7837 SYNOPSIS
7838 write_locked_table_maps()
7839 thd Pointer to THD structure
7840
7841 DESCRIPTION
7842 This function will generate and write table maps for all tables
7843 that are locked by the thread 'thd'.
7844
7845 RETURN VALUE
7846 0 All OK
7847 1 Failed to write all table maps
7848
7849 SEE ALSO
7850 THD::lock
7851 */
7852
write_locked_table_maps(THD * thd)7853 static int write_locked_table_maps(THD *thd)
7854 {
7855 DBUG_ENTER("write_locked_table_maps");
7856 DBUG_PRINT("enter", ("thd: 0x%lx thd->lock: 0x%lx "
7857 "thd->extra_lock: 0x%lx",
7858 (long) thd, (long) thd->lock, (long) thd->extra_lock));
7859
7860 DBUG_PRINT("debug", ("get_binlog_table_maps(): %d", thd->get_binlog_table_maps()));
7861
7862 if (thd->get_binlog_table_maps() == 0)
7863 {
7864 MYSQL_LOCK *locks[2];
7865 locks[0]= thd->extra_lock;
7866 locks[1]= thd->lock;
7867 for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
7868 {
7869 MYSQL_LOCK const *const lock= locks[i];
7870 if (lock == NULL)
7871 continue;
7872
7873 bool need_binlog_rows_query= thd->variables.binlog_rows_query_log_events;
7874 TABLE **const end_ptr= lock->table + lock->table_count;
7875 for (TABLE **table_ptr= lock->table ;
7876 table_ptr != end_ptr ;
7877 ++table_ptr)
7878 {
7879 TABLE *const table= *table_ptr;
7880 DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
7881 if (table->current_lock == F_WRLCK &&
7882 check_table_binlog_row_based(thd, table))
7883 {
7884 /*
7885 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
7886 (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
7887 compatible behavior with the STMT based replication even when
7888 the table is not transactional. In other words, if the operation
7889 fails while executing the insert phase nothing is written to the
7890 binlog.
7891
7892 Note that at this point, we check the type of a set of tables to
7893 create the table map events. In the function binlog_log_row(),
7894 which calls the current function, we check the type of the table
7895 of the current row.
7896 */
7897 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
7898 table->file->has_transactions();
7899 int const error= thd->binlog_write_table_map(table, has_trans,
7900 need_binlog_rows_query);
7901 /* Binlog Rows_query log event once for one statement which updates
7902 two or more tables.*/
7903 if (need_binlog_rows_query)
7904 need_binlog_rows_query= FALSE;
7905 /*
7906 If an error occurs, it is the responsibility of the caller to
7907 roll back the transaction.
7908 */
7909 if (unlikely(error))
7910 DBUG_RETURN(1);
7911 }
7912 }
7913 }
7914 }
7915 DBUG_RETURN(0);
7916 }
7917
7918 typedef bool Log_func(THD*, TABLE*, bool,
7919 const uchar*, const uchar*);
7920
7921 /**
7922
7923 The purpose of an instance of this class is to :
7924
7925 1) Given a TABLE instance, backup the given TABLE::read_set, TABLE::write_set
7926 and restore those members upon this instance disposal.
7927
7928 2) Store a reference to a dynamically allocated buffer and dispose of it upon
7929 this instance disposal.
7930 */
7931
7932 class Binlog_log_row_cleanup
7933 {
7934 public:
7935 /**
7936 This constructor aims to create temporary copies of readset and writeset.
7937 @param table A pointer to TABLE object
7938 @param temp_read_bitmap Temporary BITMAP to store read_set.
7939 @param temp_write_bitmap Temporary BITMAP to store write_set.
7940 */
Binlog_log_row_cleanup(TABLE & table,MY_BITMAP & temp_read_bitmap,MY_BITMAP & temp_write_bitmap)7941 Binlog_log_row_cleanup(TABLE &table, MY_BITMAP &temp_read_bitmap,
7942 MY_BITMAP &temp_write_bitmap)
7943 : m_cleanup_table(table),
7944 m_cleanup_read_bitmap(temp_read_bitmap),
7945 m_cleanup_write_bitmap(temp_write_bitmap)
7946 {
7947 bitmap_copy(&this->m_cleanup_read_bitmap, this->m_cleanup_table.read_set);
7948 bitmap_copy(&this->m_cleanup_write_bitmap, this->m_cleanup_table.write_set);
7949 }
7950
7951 /**
7952 This destructor aims to restore the original readset and writeset and
7953 delete the temporary copies.
7954 */
~Binlog_log_row_cleanup()7955 virtual ~Binlog_log_row_cleanup()
7956 {
7957 bitmap_copy(this->m_cleanup_table.read_set, &this->m_cleanup_read_bitmap);
7958 bitmap_copy(this->m_cleanup_table.write_set, &this->m_cleanup_write_bitmap);
7959 bitmap_free(&this->m_cleanup_read_bitmap);
7960 bitmap_free(&this->m_cleanup_write_bitmap);
7961 }
7962
7963 private:
7964 TABLE &m_cleanup_table; // Creating a TABLE to get access to its members.
7965 MY_BITMAP &m_cleanup_read_bitmap; // Temporary bitmap to store read_set.
7966 MY_BITMAP &m_cleanup_write_bitmap; // Temporary bitmap to store write_set.
7967 };
7968
binlog_log_row(TABLE * table,const uchar * before_record,const uchar * after_record,Log_func * log_func)7969 int binlog_log_row(TABLE* table,
7970 const uchar *before_record,
7971 const uchar *after_record,
7972 Log_func *log_func)
7973 {
7974 bool error= 0;
7975 THD *const thd= table->in_use;
7976
7977 if (check_table_binlog_row_based(thd, table))
7978 {
7979 if (thd->variables.transaction_write_set_extraction != HASH_ALGORITHM_OFF)
7980 {
7981 try
7982 {
7983 MY_BITMAP save_read_set;
7984 MY_BITMAP save_write_set;
7985 if (bitmap_init(&save_read_set, NULL, table->s->fields, false) ||
7986 bitmap_init(&save_write_set, NULL, table->s->fields, false))
7987 {
7988 my_error(ER_OUT_OF_RESOURCES, MYF(0));
7989 return HA_ERR_RBR_LOGGING_FAILED;
7990 }
7991
7992 Binlog_log_row_cleanup cleanup_sentry(*table, save_read_set,
7993 save_write_set);
7994 if (thd->variables.binlog_row_image == 0)
7995 {
7996 for (uint key_number= 0; key_number < table->s->keys; ++key_number)
7997 {
7998 if (((table->key_info[key_number].flags & (HA_NOSAME)) ==
7999 HA_NOSAME))
8000 {
8001 table->mark_columns_used_by_index_no_reset(key_number,
8002 table->read_set);
8003 table->mark_columns_used_by_index_no_reset(key_number,
8004 table->write_set);
8005 }
8006 }
8007 }
8008 const uchar *records[]= {after_record, before_record};
8009
8010 for (int record= 0; record < 2; ++record)
8011 {
8012 if (records[record] != NULL)
8013 {
8014 assert(records[record] == table->record[0] ||
8015 records[record] == table->record[1]);
8016 bool res= add_pke(table, thd, records[record]);
8017 if (res) return HA_ERR_RBR_LOGGING_FAILED;
8018 }
8019 }
8020 }
8021 catch (const std::bad_alloc &)
8022 {
8023 my_error(ER_OUT_OF_RESOURCES, MYF(0));
8024 return HA_ERR_RBR_LOGGING_FAILED;
8025 }
8026 }
8027 DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
8028 (table->s->fields + 7) / 8);
8029
8030 /*
8031 If there are no table maps written to the binary log, this is
8032 the first row handled in this statement. In that case, we need
8033 to write table maps for all locked tables to the binary log.
8034 */
8035 if (likely(!(error= write_locked_table_maps(thd))))
8036 {
8037 /*
8038 We need to have a transactional behavior for SQLCOM_CREATE_TABLE
8039 (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a
8040 compatible behavior with the STMT based replication even when
8041 the table is not transactional. In other words, if the operation
8042 fails while executing the insert phase nothing is written to the
8043 binlog.
8044 */
8045 bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
8046 table->file->has_transactions();
8047 error=
8048 (*log_func)(thd, table, has_trans, before_record, after_record);
8049 }
8050 }
8051 return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
8052 }
8053
ha_external_lock(THD * thd,int lock_type)8054 int handler::ha_external_lock(THD *thd, int lock_type)
8055 {
8056 int error;
8057 DBUG_ENTER("handler::ha_external_lock");
8058 /*
8059 Whether this is lock or unlock, this should be true, and is to verify that
8060 if get_auto_increment() was called (thus may have reserved intervals or
8061 taken a table lock), ha_release_auto_increment() was too.
8062 */
8063 assert(next_insert_id == 0);
8064 /* Consecutive calls for lock without unlocking in between is not allowed */
8065 assert(table_share->tmp_table != NO_TMP_TABLE ||
8066 ((lock_type != F_UNLCK && m_lock_type == F_UNLCK) ||
8067 lock_type == F_UNLCK));
8068 /* SQL HANDLER call locks/unlock while scanning (RND/INDEX). */
8069 assert(inited == NONE || table->open_by_handler);
8070
8071 if (MYSQL_HANDLER_RDLOCK_START_ENABLED() && lock_type == F_RDLCK)
8072 {
8073 MYSQL_HANDLER_RDLOCK_START(table_share->db.str,
8074 table_share->table_name.str);
8075 }
8076 else if (MYSQL_HANDLER_WRLOCK_START_ENABLED() && lock_type == F_WRLCK)
8077 {
8078 MYSQL_HANDLER_WRLOCK_START(table_share->db.str,
8079 table_share->table_name.str);
8080 }
8081 else if (MYSQL_HANDLER_UNLOCK_START_ENABLED() && lock_type == F_UNLCK)
8082 {
8083 MYSQL_HANDLER_UNLOCK_START(table_share->db.str,
8084 table_share->table_name.str);
8085 }
8086
8087 ha_statistic_increment(&SSV::ha_external_lock_count);
8088
8089 MYSQL_TABLE_LOCK_WAIT(PSI_TABLE_EXTERNAL_LOCK, lock_type,
8090 { error= external_lock(thd, lock_type); })
8091
8092 /*
8093 We cache the table flags if the locking succeeded. Otherwise, we
8094 keep them as they were when they were fetched in ha_open().
8095 */
8096
8097 if (error == 0)
8098 {
8099 /*
8100 The lock type is needed by MRR when creating a clone of this handler
8101 object.
8102 */
8103 m_lock_type= lock_type;
8104 cached_table_flags= table_flags();
8105 }
8106
8107 if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() && lock_type == F_RDLCK)
8108 {
8109 MYSQL_HANDLER_RDLOCK_DONE(error);
8110 }
8111 else if (MYSQL_HANDLER_WRLOCK_DONE_ENABLED() && lock_type == F_WRLCK)
8112 {
8113 MYSQL_HANDLER_WRLOCK_DONE(error);
8114 }
8115 else if (MYSQL_HANDLER_UNLOCK_DONE_ENABLED() && lock_type == F_UNLCK)
8116 {
8117 MYSQL_HANDLER_UNLOCK_DONE(error);
8118 }
8119 DBUG_RETURN(error);
8120 }
8121
8122
8123 /** @brief
8124 Check handler usage and reset state of file to after 'open'
8125
8126 @note can be called regardless of it is locked or not.
8127 */
ha_reset()8128 int handler::ha_reset()
8129 {
8130 DBUG_ENTER("handler::ha_reset");
8131 /* Check that we have called all proper deallocation functions */
8132 assert((uchar*) table->def_read_set.bitmap +
8133 table->s->column_bitmap_size ==
8134 (uchar*) table->def_write_set.bitmap);
8135 assert(bitmap_is_set_all(&table->s->all_set));
8136 assert(table->key_read == 0);
8137 /* ensure that ha_index_end / ha_rnd_end has been called */
8138 assert(inited == NONE);
8139 /* Free cache used by filesort */
8140 free_io_cache(table);
8141 /* reset the bitmaps to point to defaults */
8142 table->default_column_bitmaps();
8143 /* Reset information about pushed engine conditions */
8144 pushed_cond= NULL;
8145 /* Reset information about pushed index conditions */
8146 cancel_pushed_idx_cond();
8147
8148 const int retval= reset();
8149 DBUG_RETURN(retval);
8150 }
8151
8152
ha_write_row(uchar * buf)8153 int handler::ha_write_row(uchar *buf)
8154 {
8155 int error;
8156 Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
8157 assert(table_share->tmp_table != NO_TMP_TABLE ||
8158 m_lock_type == F_WRLCK);
8159
8160 DBUG_ENTER("handler::ha_write_row");
8161 DBUG_EXECUTE_IF("inject_error_ha_write_row",
8162 DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8163 DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
8164 DBUG_RETURN(HA_ERR_SE_OUT_OF_MEMORY); );
8165 MYSQL_INSERT_ROW_START(table_share->db.str, table_share->table_name.str);
8166 mark_trx_read_write();
8167
8168 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8169 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8170 set_my_errno(HA_ERR_CRASHED);
8171 DBUG_RETURN(HA_ERR_CRASHED););
8172
8173 MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
8174 { error= write_row(buf); })
8175
8176 MYSQL_INSERT_ROW_DONE(error);
8177 if (unlikely(error))
8178 DBUG_RETURN(error);
8179
8180 if (unlikely((error= binlog_log_row(table, 0, buf, log_func))))
8181 DBUG_RETURN(error); /* purecov: inspected */
8182
8183 DEBUG_SYNC_C("ha_write_row_end");
8184 DBUG_RETURN(0);
8185 }
8186
8187
ha_update_row(const uchar * old_data,uchar * new_data)8188 int handler::ha_update_row(const uchar *old_data, uchar *new_data)
8189 {
8190 int error;
8191 assert(table_share->tmp_table != NO_TMP_TABLE ||
8192 m_lock_type == F_WRLCK);
8193 Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
8194
8195 /*
8196 Some storage engines require that the new record is in record[0]
8197 (and the old record is in record[1]).
8198 */
8199 assert(new_data == table->record[0]);
8200 assert(old_data == table->record[1]);
8201
8202 DBUG_ENTER("hanlder::ha_update_row");
8203 DBUG_EXECUTE_IF("inject_error_ha_update_row",
8204 DBUG_RETURN(HA_ERR_INTERNAL_ERROR); );
8205
8206 MYSQL_UPDATE_ROW_START(table_share->db.str, table_share->table_name.str);
8207 mark_trx_read_write();
8208
8209 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8210 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8211 set_my_errno(HA_ERR_CRASHED);
8212 return(HA_ERR_CRASHED););
8213
8214 MYSQL_TABLE_IO_WAIT(PSI_TABLE_UPDATE_ROW, active_index, error,
8215 { error= update_row(old_data, new_data);})
8216
8217 MYSQL_UPDATE_ROW_DONE(error);
8218 if (unlikely(error))
8219 DBUG_RETURN(error);
8220 if (unlikely((error= binlog_log_row(table, old_data, new_data, log_func))))
8221 DBUG_RETURN(error);
8222 DBUG_RETURN(0);
8223 }
8224
ha_delete_row(const uchar * buf)8225 int handler::ha_delete_row(const uchar *buf)
8226 {
8227 int error;
8228 assert(table_share->tmp_table != NO_TMP_TABLE ||
8229 m_lock_type == F_WRLCK);
8230 Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
8231 /*
8232 Normally table->record[0] is used, but sometimes table->record[1] is used.
8233 */
8234 assert(buf == table->record[0] ||
8235 buf == table->record[1]);
8236 DBUG_EXECUTE_IF("inject_error_ha_delete_row",
8237 return HA_ERR_INTERNAL_ERROR; );
8238
8239 DBUG_EXECUTE_IF("handler_crashed_table_on_usage",
8240 my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
8241 set_my_errno(HA_ERR_CRASHED);
8242 return(HA_ERR_CRASHED););
8243
8244 MYSQL_DELETE_ROW_START(table_share->db.str, table_share->table_name.str);
8245 mark_trx_read_write();
8246
8247 MYSQL_TABLE_IO_WAIT(PSI_TABLE_DELETE_ROW, active_index, error,
8248 { error= delete_row(buf);})
8249
8250 MYSQL_DELETE_ROW_DONE(error);
8251 if (unlikely(error))
8252 return error;
8253 if (unlikely((error= binlog_log_row(table, buf, 0, log_func))))
8254 return error;
8255 return 0;
8256 }
8257
8258
8259
8260 /** @brief
8261 use_hidden_primary_key() is called in case of an update/delete when
8262 (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
8263 but we don't have a primary key
8264 */
use_hidden_primary_key()8265 void handler::use_hidden_primary_key()
8266 {
8267 /* fallback to use all columns in the table to identify row */
8268 table->use_all_columns();
8269 }
8270
8271
8272 /**
8273 Get an initialized ha_share.
8274
8275 @return Initialized ha_share
8276 @retval NULL ha_share is not yet initialized.
8277 @retval != NULL previous initialized ha_share.
8278
8279 @note
8280 If not a temp table, then LOCK_ha_data must be held.
8281 */
8282
get_ha_share_ptr()8283 Handler_share *handler::get_ha_share_ptr()
8284 {
8285 DBUG_ENTER("handler::get_ha_share_ptr");
8286 assert(ha_share && table_share);
8287
8288 #ifndef NDEBUG
8289 if (table_share->tmp_table == NO_TMP_TABLE)
8290 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8291 #endif
8292
8293 DBUG_RETURN(*ha_share);
8294 }
8295
8296
8297 /**
8298 Set ha_share to be used by all instances of the same table/partition.
8299
8300 @param ha_share Handler_share to be shared.
8301
8302 @note
8303 If not a temp table, then LOCK_ha_data must be held.
8304 */
8305
set_ha_share_ptr(Handler_share * arg_ha_share)8306 void handler::set_ha_share_ptr(Handler_share *arg_ha_share)
8307 {
8308 DBUG_ENTER("handler::set_ha_share_ptr");
8309 assert(ha_share);
8310 #ifndef NDEBUG
8311 if (table_share->tmp_table == NO_TMP_TABLE)
8312 mysql_mutex_assert_owner(&table_share->LOCK_ha_data);
8313 #endif
8314
8315 *ha_share= arg_ha_share;
8316 DBUG_VOID_RETURN;
8317 }
8318
8319
8320 /**
8321 Take a lock for protecting shared handler data.
8322 */
8323
lock_shared_ha_data()8324 void handler::lock_shared_ha_data()
8325 {
8326 assert(table_share);
8327 if (table_share->tmp_table == NO_TMP_TABLE)
8328 mysql_mutex_lock(&table_share->LOCK_ha_data);
8329 }
8330
8331
8332 /**
8333 Release lock for protecting ha_share.
8334 */
8335
unlock_shared_ha_data()8336 void handler::unlock_shared_ha_data()
8337 {
8338 assert(table_share);
8339 if (table_share->tmp_table == NO_TMP_TABLE)
8340 mysql_mutex_unlock(&table_share->LOCK_ha_data);
8341 }
8342
8343
8344 /**
8345 This structure is a helper structure for passing the length and pointer of
8346 blob space allocated by storage engine.
8347 */
8348 struct blob_len_ptr{
8349 uint length; // length of the blob
8350 uchar *ptr; // pointer of the value
8351 };
8352
8353
8354 /**
8355 Get the blob length and pointer of allocated space from the record buffer.
8356
8357 During evaluating the blob virtual generated columns, the blob space will
8358 be allocated by server. In order to keep the blob data after the table is
8359 closed, we need write the data into a specified space allocated by storage
8360 engine. Here, we have to extract the space pointer and length from the
8361 record buffer.
8362 After we get the value of virtual generated columns, copy the data into
8363 the specified space and store it in the record buffer (@see copy_blob_data()).
8364
8365 @param table the pointer of table
8366 @param fields bitmap of field index of evaluated
8367 generated column
8368 @param[out] blob_len_ptr_array an array to record the length and pointer
8369 of allocated space by storage engine.
8370 @note The caller should provide the blob_len_ptr_array with a size of
8371 MAX_FIELDS.
8372 */
8373
extract_blob_space_and_length_from_record_buff(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8374 static void extract_blob_space_and_length_from_record_buff(const TABLE *table,
8375 const MY_BITMAP *const fields,
8376 blob_len_ptr *blob_len_ptr_array)
8377 {
8378 int num= 0;
8379 for (Field **vfield= table->vfield; *vfield; vfield++)
8380 {
8381 // Check if this field should be included
8382 if (bitmap_is_set(fields, (*vfield)->field_index) &&
8383 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8384 {
8385 blob_len_ptr_array[num].length= (*vfield)->data_length();
8386 // TODO: The following check is only for Innodb.
8387 assert(blob_len_ptr_array[num].length == 255 ||
8388 blob_len_ptr_array[num].length == 768 ||
8389 blob_len_ptr_array[num].length == 3073);
8390
8391 uchar *ptr;
8392 (*vfield)->get_ptr(&ptr);
8393 blob_len_ptr_array[num].ptr= ptr;
8394
8395 // Let server allocate the space for BLOB virtual generated columns
8396 (*vfield)->reset();
8397
8398 num++;
8399 assert(num <= MAX_FIELDS);
8400 }
8401 }
8402 }
8403
8404
8405 /**
8406 Copy the value of BLOB virtual generated columns into the space allocated
8407 by storage engine.
8408
8409 This is because the table is closed after evaluating the value. In order to
8410 keep the BLOB value after the table is closed, we have to copy the value into
8411 the place where storage engine prepares for.
8412
8413 @param table pointer of the table to be operated on
8414 @param fields bitmap of field index of evaluated generated column
8415 @param blob_len_ptr_array array of length and pointer of allocated space by
8416 storage engine.
8417 */
8418
copy_blob_data(const TABLE * table,const MY_BITMAP * const fields,blob_len_ptr * blob_len_ptr_array)8419 static void copy_blob_data(const TABLE *table,
8420 const MY_BITMAP *const fields,
8421 blob_len_ptr *blob_len_ptr_array)
8422 {
8423 uint num= 0;
8424 for (Field **vfield= table->vfield; *vfield; vfield++)
8425 {
8426 // Check if this field should be included
8427 if (bitmap_is_set(fields, (*vfield)->field_index) &&
8428 (*vfield)->is_virtual_gcol() && (*vfield)->type() == MYSQL_TYPE_BLOB)
8429 {
8430 assert(blob_len_ptr_array[num].length > 0);
8431 assert(blob_len_ptr_array[num].ptr != NULL);
8432
8433 /*
8434 Only copy as much of the blob as the storage engine has
8435 allocated space for. This is sufficient since the only use of the
8436 blob in the storage engine is for using a prefix of it in a
8437 secondary index.
8438 */
8439 uint length= (*vfield)->data_length();
8440 const uint alloc_len= blob_len_ptr_array[num].length;
8441 length= length > alloc_len ? alloc_len : length;
8442
8443 uchar *ptr;
8444 (*vfield)->get_ptr(&ptr);
8445 memcpy(blob_len_ptr_array[num].ptr, ptr, length);
8446 (down_cast<Field_blob *>(*vfield))->store_in_allocated_space(
8447 pointer_cast<char *>(blob_len_ptr_array[num].ptr),
8448 length);
8449 num++;
8450 assert(num <= MAX_FIELDS);
8451 }
8452 }
8453 }
8454
8455
8456 /*
8457 Evaluate generated column's value. This is an internal helper reserved for
8458 handler::my_eval_gcolumn_expr().
8459
8460 @param thd pointer of THD
8461 @param table The pointer of table where evaluted generated
8462 columns are in
8463 @param fields bitmap of field index of evaluated generated column
8464 @param[in,out] record record buff of base columns generated column depends.
8465 After calling this function, it will be used to return
8466 the value of generated column.
8467 @param in_purge whehter the function is called by purge thread
8468
8469 @return true in case of error, false otherwise.
8470 */
8471
my_eval_gcolumn_expr_helper(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record,bool in_purge)8472 static bool my_eval_gcolumn_expr_helper(THD *thd, TABLE *table,
8473 const MY_BITMAP *const fields,
8474 uchar *record,
8475 bool in_purge)
8476 {
8477 DBUG_ENTER("my_eval_gcolumn_expr_helper");
8478 assert(table && table->vfield);
8479 assert(!thd->is_error());
8480
8481 uchar *old_buf= table->record[0];
8482 repoint_field_to_record(table, old_buf, record);
8483
8484 blob_len_ptr blob_len_ptr_array[MAX_FIELDS];
8485
8486 /*
8487 If it's purge thread, we need get the space allocated by storage engine
8488 for blob.
8489 */
8490 if (in_purge)
8491 extract_blob_space_and_length_from_record_buff(table, fields,
8492 blob_len_ptr_array);
8493
8494 bool res= false;
8495 MY_BITMAP fields_to_evaluate;
8496 my_bitmap_map bitbuf[bitmap_buffer_size(MAX_FIELDS) / sizeof(my_bitmap_map)];
8497 bitmap_init(&fields_to_evaluate, bitbuf, table->s->fields, 0);
8498 bitmap_set_all(&fields_to_evaluate);
8499 bitmap_intersect(&fields_to_evaluate, fields);
8500 /*
8501 In addition to evaluating the value for the columns requested by
8502 the caller we also need to evaluate any virtual columns that these
8503 depend on.
8504 This loop goes through the columns that should be evaluated and
8505 adds all the base columns. If the base column is virtual, it has
8506 to be evaluated.
8507 */
8508 for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8509 {
8510 Field *field= *vfield_ptr;
8511 // Validate that the field number is less than the bit map size
8512 assert(field->field_index < fields->n_bits);
8513
8514 if (bitmap_is_set(fields, field->field_index))
8515 bitmap_union(&fields_to_evaluate, &field->gcol_info->base_columns_map);
8516 }
8517
8518 /*
8519 Evaluate all requested columns and all base columns these depends
8520 on that are virtual.
8521
8522 This function is called by the storage engine, which may request to
8523 evaluate more generated columns than read_set/write_set says.
8524 For example, InnoDB's row_sel_sec_rec_is_for_clust_rec() reads the full
8525 record from the clustered index and asks us to compute generated columns
8526 that match key fields in the used secondary index. So we trust that the
8527 engine has filled all base columns necessary to requested computations,
8528 and we ignore read_set/write_set.
8529 */
8530
8531 my_bitmap_map *old_maps[2];
8532 dbug_tmp_use_all_columns(table, old_maps,
8533 table->read_set, table->write_set);
8534
8535 for (Field **vfield_ptr= table->vfield; *vfield_ptr; vfield_ptr++)
8536 {
8537 Field *field= *vfield_ptr;
8538
8539 // Check if we should evaluate this field
8540 if (bitmap_is_set(&fields_to_evaluate, field->field_index) &&
8541 field->is_virtual_gcol())
8542 {
8543 assert(field->gcol_info && field->gcol_info->expr_item->fixed);
8544
8545 const type_conversion_status save_in_field_status=
8546 field->gcol_info->expr_item->save_in_field(field, 0);
8547 assert(!thd->is_error() || save_in_field_status != TYPE_OK);
8548
8549 /*
8550 save_in_field() may return non-zero even if there was no
8551 error. This happens if a warning is raised, such as an
8552 out-of-range warning when converting the result to the target
8553 type of the virtual column. We should stop only if the
8554 non-zero return value was caused by an actual error.
8555 */
8556 if (save_in_field_status != TYPE_OK && thd->is_error())
8557 {
8558 res= true;
8559 break;
8560 }
8561 }
8562 }
8563
8564 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_maps);
8565
8566 /*
8567 If it's a purge thread, we need copy the blob data into specified place
8568 allocated by storage engine so that the blob data still can be accessed
8569 after table is closed.
8570 */
8571 if (in_purge)
8572 copy_blob_data(table, fields, blob_len_ptr_array);
8573
8574 repoint_field_to_record(table, record, old_buf);
8575 DBUG_RETURN(res);
8576 }
8577
8578
8579 /**
8580 Callback to allow InnoDB to prepare a template for generated
8581 column processing. This function will open the table without
8582 opening in the engine and call the provided function with
8583 the TABLE object made. The function will then close the TABLE.
8584
8585 @param thd Thread handle
8586 @param db_name Name of database containing the table
8587 @param table_name Name of table to open
8588 @param myc InnoDB function to call for processing TABLE
8589 @param ib_table Argument for InnoDB function
8590
8591 @return true in case of error, false otherwise.
8592 */
8593
my_prepare_gcolumn_template(THD * thd,const char * db_name,const char * table_name,my_gcolumn_template_callback_t myc,void * ib_table)8594 bool handler::my_prepare_gcolumn_template(THD *thd,
8595 const char *db_name,
8596 const char *table_name,
8597 my_gcolumn_template_callback_t myc,
8598 void* ib_table)
8599 {
8600 char path[FN_REFLEN + 1];
8601 bool was_truncated;
8602 build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8603 db_name, table_name, "", 0, &was_truncated);
8604 assert(!was_truncated);
8605 lex_start(thd);
8606 bool rc= true;
8607
8608 // Note! The last argument to open_table_uncached() must be false,
8609 // since the table already exists in the TDC. Allowing the table to
8610 // be opened in the SE in this case is dangerous as the two shares
8611 // could get conflicting SE private data.
8612 TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8613 false, false);
8614 if (table)
8615 {
8616 myc(table, ib_table);
8617 intern_close_table(table);
8618 rc= false;
8619 }
8620 lex_end(thd->lex);
8621 return rc;
8622 }
8623
8624
8625 /**
8626 Callback for generated columns processing. Will open the table, in the
8627 server *only*, and call my_eval_gcolumn_expr_helper() to do the actual
8628 processing. This function is a variant of the other
8629 handler::my_eval_gcolumn_expr() but is intended for use when no TABLE
8630 object already exists - e.g. from purge threads.
8631
8632 Note! The call to open_table_uncached() must be made with the last
8633 argument (open_in_engine) set to false. Failing to do so will cause
8634 deadlocks and incorrect behavior.
8635
8636 @param thd Thread handle
8637 @param db_name Database containing the table to open
8638 @param table_name Name of table to open
8639 @param fields Bitmap of field index of evaluated generated column
8640 @param record Record buffer
8641
8642 @return true in case of error, false otherwise.
8643 */
8644
my_eval_gcolumn_expr_with_open(THD * thd,const char * db_name,const char * table_name,const MY_BITMAP * const fields,uchar * record)8645 bool handler::my_eval_gcolumn_expr_with_open(THD *thd,
8646 const char *db_name,
8647 const char *table_name,
8648 const MY_BITMAP *const fields,
8649 uchar *record)
8650 {
8651 bool retval= true;
8652 lex_start(thd);
8653
8654 char path[FN_REFLEN + 1];
8655 bool was_truncated;
8656 build_table_filename(path, sizeof(path) - 1 - reg_ext_length,
8657 db_name, table_name, "", 0, &was_truncated);
8658 assert(!was_truncated);
8659
8660 TABLE *table= open_table_uncached(thd, path, db_name, table_name,
8661 false, false);
8662 if (table)
8663 {
8664 retval= my_eval_gcolumn_expr_helper(thd, table, fields, record, true);
8665 intern_close_table(table);
8666 }
8667
8668 lex_end(thd->lex);
8669 return retval;
8670 }
8671
8672
8673 /**
8674 Evaluate generated Column's value. If the engine has to write an index entry
8675 to its UNDO log (in a DELETE or UPDATE), and the index is on a virtual
8676 generated column, engine needs to calculate the column's value. This variant
8677 of handler::my_eval_gcolumn_expr() is used by client threads which have a
8678 TABLE.
8679
8680 @param thd Thread handle
8681 @param table mysql table object
8682 @param fields bitmap of field index of evaluated
8683 generated column
8684 @param record buff of base columns generated column depends.
8685 After calling this function, it will be used to
8686 return the value of generated column.
8687
8688 @retval true in case of error
8689 @retval false on success.
8690 */
8691
my_eval_gcolumn_expr(THD * thd,TABLE * table,const MY_BITMAP * const fields,uchar * record)8692 bool handler::my_eval_gcolumn_expr(THD *thd, TABLE *table,
8693 const MY_BITMAP *const fields,
8694 uchar *record)
8695 {
8696 DBUG_ENTER("my_eval_gcolumn_expr");
8697
8698 const bool res=
8699 my_eval_gcolumn_expr_helper(thd, table, fields, record, false);
8700 DBUG_RETURN(res);
8701 }
8702
8703
8704 /**
8705 Auxiliary structure for passing information to notify_*_helper()
8706 functions.
8707 */
8708
8709 struct HTON_NOTIFY_PARAMS
8710 {
HTON_NOTIFY_PARAMSHTON_NOTIFY_PARAMS8711 HTON_NOTIFY_PARAMS(const MDL_key *mdl_key,
8712 ha_notification_type mdl_type)
8713 : key(mdl_key), notification_type(mdl_type),
8714 some_htons_were_notified(false),
8715 victimized(false)
8716 {}
8717
8718 const MDL_key *key;
8719 const ha_notification_type notification_type;
8720 bool some_htons_were_notified;
8721 bool victimized;
8722 };
8723
8724
8725 static my_bool
notify_exclusive_mdl_helper(THD * thd,plugin_ref plugin,void * arg)8726 notify_exclusive_mdl_helper(THD *thd, plugin_ref plugin, void *arg)
8727 {
8728 handlerton *hton= plugin_data<handlerton*>(plugin);
8729 if (hton->state == SHOW_OPTION_YES && hton->notify_exclusive_mdl)
8730 {
8731 HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8732
8733 if (hton->notify_exclusive_mdl(thd, params->key,
8734 params->notification_type,
8735 ¶ms->victimized))
8736 {
8737 // Ignore failures from post event notification.
8738 if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8739 return TRUE;
8740 }
8741 else
8742 params->some_htons_were_notified= true;
8743 }
8744 return FALSE;
8745 }
8746
8747
8748 /**
8749 Notify/get permission from all interested storage engines before
8750 acquisition or after release of exclusive metadata lock on object
8751 represented by key.
8752
8753 @param thd Thread context.
8754 @param mdl_key MDL key identifying object on which exclusive
8755 lock is to be acquired/was released.
8756 @param notification_type Indicates whether this is pre-acquire or
8757 post-release notification.
8758 @param victimized 'true' if locking failed as we were selected
8759 as a victim in order to avoid possible deadlocks.
8760
8761 @note @see handlerton::notify_exclusive_mdl for details about
8762 calling convention and error reporting.
8763
8764 @return False - if notification was successful/lock can be acquired,
8765 True - if it has failed/lock should not be acquired.
8766 */
8767
ha_notify_exclusive_mdl(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type,bool * victimized)8768 bool ha_notify_exclusive_mdl(THD *thd, const MDL_key *mdl_key,
8769 ha_notification_type notification_type,
8770 bool *victimized)
8771 {
8772 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8773 *victimized = false;
8774 if (plugin_foreach(thd, notify_exclusive_mdl_helper,
8775 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms))
8776 {
8777 *victimized = params.victimized;
8778 /*
8779 If some SE hasn't given its permission to acquire lock and some SEs
8780 has given their permissions, we need to notify the latter group about
8781 failed lock acquisition. We do this by calling post-release notification
8782 for all interested SEs unconditionally.
8783 */
8784 if (notification_type == HA_NOTIFY_PRE_EVENT &&
8785 params.some_htons_were_notified)
8786 {
8787 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8788 (void) plugin_foreach(thd, notify_exclusive_mdl_helper,
8789 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8790 }
8791 return true;
8792 }
8793 return false;
8794 }
8795
8796
8797 static my_bool
notify_alter_table_helper(THD * thd,plugin_ref plugin,void * arg)8798 notify_alter_table_helper(THD *thd, plugin_ref plugin, void *arg)
8799 {
8800 handlerton *hton= plugin_data<handlerton*>(plugin);
8801 if (hton->state == SHOW_OPTION_YES && hton->notify_alter_table)
8802 {
8803 HTON_NOTIFY_PARAMS *params= reinterpret_cast<HTON_NOTIFY_PARAMS*>(arg);
8804
8805 if (hton->notify_alter_table(thd, params->key, params->notification_type))
8806 {
8807 // Ignore failures from post event notification.
8808 if (params->notification_type == HA_NOTIFY_PRE_EVENT)
8809 return TRUE;
8810 }
8811 else
8812 params->some_htons_were_notified= true;
8813 }
8814 return FALSE;
8815 }
8816
8817
8818 /**
8819 Notify/get permission from all interested storage engines before
8820 or after executed ALTER TABLE on the table identified by key.
8821
8822 @param thd Thread context.
8823 @param mdl_key MDL key identifying table.
8824 @param notification_type Indicates whether this is pre-ALTER or
8825 post-ALTER notification.
8826
8827 @note @see handlerton::notify_alter_table for rationale,
8828 details about calling convention and error reporting.
8829
8830 @return False - if notification was successful/ALTER TABLE can
8831 proceed.
8832 True - if it has failed/ALTER TABLE should fail.
8833 */
8834
ha_notify_alter_table(THD * thd,const MDL_key * mdl_key,ha_notification_type notification_type)8835 bool ha_notify_alter_table(THD *thd, const MDL_key *mdl_key,
8836 ha_notification_type notification_type)
8837 {
8838 HTON_NOTIFY_PARAMS params(mdl_key, notification_type);
8839
8840 if (plugin_foreach(thd, notify_alter_table_helper,
8841 MYSQL_STORAGE_ENGINE_PLUGIN, ¶ms))
8842 {
8843 /*
8844 If some SE hasn't given its permission to do ALTER TABLE and some SEs
8845 has given their permissions, we need to notify the latter group about
8846 failed attemopt. We do this by calling post-ALTER TABLE notification
8847 for all interested SEs unconditionally.
8848 */
8849 if (notification_type == HA_NOTIFY_PRE_EVENT &&
8850 params.some_htons_were_notified)
8851 {
8852 HTON_NOTIFY_PARAMS rollback_params(mdl_key, HA_NOTIFY_POST_EVENT);
8853 (void) plugin_foreach(thd, notify_alter_table_helper,
8854 MYSQL_STORAGE_ENGINE_PLUGIN, &rollback_params);
8855 }
8856 return true;
8857 }
8858 return false;
8859 }
8860
8861 /**
8862 Set the transaction isolation level for the next transaction and update
8863 session tracker information about the transaction isolation level.
8864
8865 @param thd THD session setting the tx_isolation.
8866 @param tx_isolation The isolation level to be set.
8867 @param one_shot True if the isolation level should be restored to
8868 session default after finishing the transaction.
8869 */
set_tx_isolation(THD * thd,enum_tx_isolation tx_isolation,bool one_shot)8870 bool set_tx_isolation(THD *thd,
8871 enum_tx_isolation tx_isolation,
8872 bool one_shot)
8873 {
8874 Transaction_state_tracker *tst= NULL;
8875
8876 if (thd->variables.session_track_transaction_info > TX_TRACK_NONE)
8877 tst= (Transaction_state_tracker *)
8878 thd->session_tracker.get_tracker(TRANSACTION_INFO_TRACKER);
8879
8880 thd->tx_isolation= tx_isolation;
8881
8882 if (one_shot)
8883 {
8884 assert(!thd->in_active_multi_stmt_transaction());
8885 assert(!thd->in_sub_stmt);
8886 enum enum_tx_isol_level l;
8887 switch (thd->tx_isolation) {
8888 case ISO_READ_UNCOMMITTED:
8889 l= TX_ISOL_UNCOMMITTED;
8890 break;
8891 case ISO_READ_COMMITTED:
8892 l= TX_ISOL_COMMITTED;
8893 break;
8894 case ISO_REPEATABLE_READ:
8895 l= TX_ISOL_REPEATABLE;
8896 break;
8897 case ISO_SERIALIZABLE:
8898 l= TX_ISOL_SERIALIZABLE;
8899 break;
8900 default:
8901 assert(0);
8902 return true;
8903 }
8904 if (tst)
8905 tst->set_isol_level(thd, l);
8906 }
8907 else if (tst)
8908 {
8909 tst->set_isol_level(thd, TX_ISOL_INHERIT);
8910 }
8911 return false;
8912 }
8913
8914
8915 /**
8916 Checks if the file name is reserved word used by SE by invoking
8917 the handlerton method.
8918
8919 @param unused1 thread handler which is unused.
8920 @param plugin SE plugin.
8921 @param name Database name.
8922
8923 @retval true If the name is reserved word.
8924 @retval false If the name is not reserved word.
8925 */
is_reserved_db_name_handlerton(THD * unused1,plugin_ref plugin,void * name)8926 static my_bool is_reserved_db_name_handlerton(THD *unused1, plugin_ref plugin,
8927 void *name)
8928 {
8929 handlerton *hton= plugin_data<handlerton*>(plugin);
8930 if (hton->state == SHOW_OPTION_YES && hton->is_reserved_db_name)
8931 return (hton->is_reserved_db_name(hton, (const char *)name));
8932 return false;
8933 }
8934
8935
8936 /**
8937 Check if the file name is reserved word used by SE.
8938
8939 @param name Database name.
8940
8941 @retval true If the name is a reserved word.
8942 @retval false If the name is not a reserved word.
8943 */
ha_check_reserved_db_name(const char * name)8944 bool ha_check_reserved_db_name(const char* name)
8945 {
8946 return (plugin_foreach(NULL, is_reserved_db_name_handlerton,
8947 MYSQL_STORAGE_ENGINE_PLUGIN, (char *)name));
8948 }
8949